Install whatsapp bridge deps in container

Remove git submodules from container
apt -> apt-get
2026-03-29 14:30:09 +11:00 · 2026-03-29 14:29:29 +11:00 · 2026-03-29 13:55:18 +11:00 · 2026-03-29 13:55:18 +11:00 · 2026-03-29 13:55:18 +11:00 · 2026-03-29 13:55:18 +11:00
128 changed files with 5318 additions and 633 deletions
@@ -0,0 +1,13 @@
+# Git
+.git
+.gitignore
+.gitmodules
+
+# Dependencies
+node_modules
+
+# CI/CD
+.github
+
+# Environment files
+.env
@@ -74,6 +74,10 @@ HF_TOKEN=
 # TOOL API KEYS
 # =============================================================================

+# Exa API Key - AI-native web search and contents
+# Get at: https://exa.ai
+EXA_API_KEY=
+
 # Parallel API Key - AI-native web search and extract
 # Get at: https://parallel.ai
 PARALLEL_API_KEY=
@@ -0,0 +1,61 @@
+name: Docker Build and Publish
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          load: true
+          tags: nousresearch/hermes-agent:test
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Test image starts
+        run: |
+          docker run --rm \
+            -v /tmp/hermes-test:/opt/data \
+            --entrypoint /opt/hermes/docker/entrypoint.sh \
+            nousresearch/hermes-agent:test --help
+
+      - name: Log in to Docker Hub
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Push image
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: |
+            nousresearch/hermes-agent:latest
+            nousresearch/hermes-agent:${{ github.sha }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
@@ -0,0 +1,20 @@
+FROM debian:13.4
+
+RUN apt-get update
+RUN apt-get install -y nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev
+
+COPY . /opt/hermes
+WORKDIR /opt/hermes
+
+RUN pip install -e ".[all]" --break-system-packages
+RUN npm install
+RUN npx playwright install --with-deps chromium
+WORKDIR /opt/hermes/scripts/whatsapp-bridge
+RUN npm install
+
+WORKDIR /opt/hermes
+RUN chmod +x /opt/hermes/docker/entrypoint.sh
+
+ENV HERMES_HOME=/opt/data
+VOLUME [ "/opt/data" ]
+ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -0,0 +1,348 @@
+# Hermes Agent v0.5.0 (v2026.3.28)
+
+**Release Date:** March 28, 2026
+
+> The hardening release — Hugging Face provider, /model command overhaul, Telegram Private Chat Topics, native Modal SDK, plugin lifecycle hooks, tool-use enforcement for GPT models, Nix flake, 50+ security and reliability fixes, and a comprehensive supply chain audit.
+
+---
+
+## ✨ Highlights
+
+- **Nous Portal now supports 400+ models** — The Nous Research inference portal has expanded dramatically, giving Hermes Agent users access to over 400 models through a single provider endpoint
+
+- **Hugging Face as a first-class inference provider** — Full integration with HF Inference API including curated agentic model picker that maps to OpenRouter analogues, live `/models` endpoint probe, and setup wizard flow ([#3419](https://github.com/NousResearch/hermes-agent/pull/3419), [#3440](https://github.com/NousResearch/hermes-agent/pull/3440))
+
+- **Telegram Private Chat Topics** — Project-based conversations with functional skill binding per topic, enabling isolated workflows within a single Telegram chat ([#3163](https://github.com/NousResearch/hermes-agent/pull/3163))
+
+- **Native Modal SDK backend** — Replaced swe-rex dependency with native Modal SDK (`Sandbox.create.aio` + `exec.aio`), eliminating tunnels and simplifying the Modal terminal backend ([#3538](https://github.com/NousResearch/hermes-agent/pull/3538))
+
+- **Plugin lifecycle hooks activated** — `pre_llm_call`, `post_llm_call`, `on_session_start`, and `on_session_end` hooks now fire in the agent loop and CLI/gateway, completing the plugin hook system ([#3542](https://github.com/NousResearch/hermes-agent/pull/3542))
+
+- **Improved OpenAI Model Reliability** — Added `GPT_TOOL_USE_GUIDANCE` to prevent GPT models from describing intended actions instead of making tool calls, plus automatic stripping of stale budget warnings from conversation history that caused models to avoid tools across turns ([#3528](https://github.com/NousResearch/hermes-agent/pull/3528))
+
+- **Nix flake** — Full uv2nix build, NixOS module with persistent container mode, auto-generated config keys from Python source, and suffix PATHs for agent-friendliness ([#20](https://github.com/NousResearch/hermes-agent/pull/20), [#3274](https://github.com/NousResearch/hermes-agent/pull/3274), [#3061](https://github.com/NousResearch/hermes-agent/pull/3061)) by @alt-glitch
+
+- **Supply chain hardening** — Removed compromised `litellm` dependency, pinned all dependency version ranges, regenerated `uv.lock` with hashes, added CI workflow scanning PRs for supply chain attack patterns, and bumped deps to fix CVEs ([#2796](https://github.com/NousResearch/hermes-agent/pull/2796), [#2810](https://github.com/NousResearch/hermes-agent/pull/2810), [#2812](https://github.com/NousResearch/hermes-agent/pull/2812), [#2816](https://github.com/NousResearch/hermes-agent/pull/2816), [#3073](https://github.com/NousResearch/hermes-agent/pull/3073))
+
+- **Anthropic output limits fix** — Replaced hardcoded 16K `max_tokens` with per-model native output limits (128K for Opus 4.6, 64K for Sonnet 4.6), fixing "Response truncated" and thinking-budget exhaustion on direct Anthropic API ([#3426](https://github.com/NousResearch/hermes-agent/pull/3426), [#3444](https://github.com/NousResearch/hermes-agent/pull/3444))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### New Provider: Hugging Face
+- First-class Hugging Face Inference API integration with auth, setup wizard, and model picker ([#3419](https://github.com/NousResearch/hermes-agent/pull/3419))
+- Curated model list mapping OpenRouter agentic defaults to HF equivalents — providers with 8+ curated models skip live `/models` probe for speed ([#3440](https://github.com/NousResearch/hermes-agent/pull/3440))
+- Added glm-5-turbo to Z.AI provider model list ([#3095](https://github.com/NousResearch/hermes-agent/pull/3095))
+
+### Provider & Model Improvements
+- `/model` command overhaul — extracted shared `switch_model()` pipeline for CLI and gateway, custom endpoint support, provider-aware routing ([#2795](https://github.com/NousResearch/hermes-agent/pull/2795), [#2799](https://github.com/NousResearch/hermes-agent/pull/2799))
+- Removed `/model` slash command from CLI and gateway in favor of `hermes model` subcommand ([#3080](https://github.com/NousResearch/hermes-agent/pull/3080))
+- Preserve `custom` provider instead of silently remapping to `openrouter` ([#2792](https://github.com/NousResearch/hermes-agent/pull/2792))
+- Read root-level `provider` and `base_url` from config.yaml into model config ([#3112](https://github.com/NousResearch/hermes-agent/pull/3112))
+- Align Nous Portal model slugs with OpenRouter naming ([#3253](https://github.com/NousResearch/hermes-agent/pull/3253))
+- Fix Alibaba provider default endpoint and model list ([#3484](https://github.com/NousResearch/hermes-agent/pull/3484))
+- Allow MiniMax users to override `/v1` → `/anthropic` auto-correction ([#3553](https://github.com/NousResearch/hermes-agent/pull/3553))
+- Migrate OAuth token refresh to `platform.claude.com` with fallback ([#3246](https://github.com/NousResearch/hermes-agent/pull/3246))
+
+### Agent Loop & Conversation
+- **Improved OpenAI model reliability** — `GPT_TOOL_USE_GUIDANCE` prevents GPT models from describing actions instead of calling tools + automatic budget warning stripping from history ([#3528](https://github.com/NousResearch/hermes-agent/pull/3528))
+- **Surface lifecycle events** — All retry, fallback, and compression events now surface to the user as formatted messages ([#3153](https://github.com/NousResearch/hermes-agent/pull/3153))
+- **Anthropic output limits** — Per-model native output limits instead of hardcoded 16K `max_tokens` ([#3426](https://github.com/NousResearch/hermes-agent/pull/3426))
+- **Thinking-budget exhaustion detection** — Skip useless continuation retries when model uses all output tokens on reasoning ([#3444](https://github.com/NousResearch/hermes-agent/pull/3444))
+- Always prefer streaming for API calls to prevent hung subagents ([#3120](https://github.com/NousResearch/hermes-agent/pull/3120))
+- Restore safe non-streaming fallback after stream failures ([#3020](https://github.com/NousResearch/hermes-agent/pull/3020))
+- Give subagents independent iteration budgets ([#3004](https://github.com/NousResearch/hermes-agent/pull/3004))
+- Update `api_key` in `_try_activate_fallback` for subagent auth ([#3103](https://github.com/NousResearch/hermes-agent/pull/3103))
+- Graceful return on max retries instead of crashing thread ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Count compression restarts toward retry limit ([#3070](https://github.com/NousResearch/hermes-agent/pull/3070))
+- Include tool tokens in preflight estimate, guard context probe persistence ([#3164](https://github.com/NousResearch/hermes-agent/pull/3164))
+- Update context compressor limits after fallback activation ([#3305](https://github.com/NousResearch/hermes-agent/pull/3305))
+- Validate empty user messages to prevent Anthropic API 400 errors ([#3322](https://github.com/NousResearch/hermes-agent/pull/3322))
+- GLM reasoning-only and max-length handling ([#3010](https://github.com/NousResearch/hermes-agent/pull/3010))
+- Increase API timeout default from 900s to 1800s for slow-thinking models ([#3431](https://github.com/NousResearch/hermes-agent/pull/3431))
+- Send `max_tokens` for Claude/OpenRouter + retry SSE connection errors ([#3497](https://github.com/NousResearch/hermes-agent/pull/3497))
+- Prevent AsyncOpenAI/httpx cross-loop deadlock in gateway mode ([#2701](https://github.com/NousResearch/hermes-agent/pull/2701)) by @ctlst
+
+### Streaming & Reasoning
+- **Persist reasoning across gateway session turns** with new schema v6 columns (`reasoning`, `reasoning_details`, `codex_reasoning_items`) ([#2974](https://github.com/NousResearch/hermes-agent/pull/2974))
+- Detect and kill stale SSE connections ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Fix stale stream detector race causing spurious `RemoteProtocolError` ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Skip duplicate callback for `<think>`-extracted reasoning during streaming ([#3116](https://github.com/NousResearch/hermes-agent/pull/3116))
+- Preserve reasoning fields in `rewrite_transcript` ([#3311](https://github.com/NousResearch/hermes-agent/pull/3311))
+- Preserve Gemini thought signatures in streamed tool calls ([#2997](https://github.com/NousResearch/hermes-agent/pull/2997))
+- Ensure first delta is fired during reasoning updates ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+### Session & Memory
+- **Session search recent sessions mode** — Omit query to browse recent sessions with titles, previews, and timestamps ([#2533](https://github.com/NousResearch/hermes-agent/pull/2533))
+- **Session config surfacing** on `/new`, `/reset`, and auto-reset ([#3321](https://github.com/NousResearch/hermes-agent/pull/3321))
+- **Third-party session isolation** — `--source` flag for isolating sessions by origin ([#3255](https://github.com/NousResearch/hermes-agent/pull/3255))
+- Add `/resume` CLI handler, session log truncation guard, `reopen_session` API ([#3315](https://github.com/NousResearch/hermes-agent/pull/3315))
+- Clear compressor summary and turn counter on `/clear` and `/new` ([#3102](https://github.com/NousResearch/hermes-agent/pull/3102))
+- Surface silent SessionDB failures that cause session data loss ([#2999](https://github.com/NousResearch/hermes-agent/pull/2999))
+- Session search fallback preview on summarization failure ([#3478](https://github.com/NousResearch/hermes-agent/pull/3478))
+- Prevent stale memory overwrites by flush agent ([#2687](https://github.com/NousResearch/hermes-agent/pull/2687))
+
+### Context Compression
+- Replace dead `summary_target_tokens` with ratio-based scaling ([#2554](https://github.com/NousResearch/hermes-agent/pull/2554))
+- Expose `compression.target_ratio`, `protect_last_n`, and `threshold` in `DEFAULT_CONFIG` ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Restore sane defaults and cap summary at 12K tokens ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Preserve transcript on `/compress` and hygiene compression ([#3556](https://github.com/NousResearch/hermes-agent/pull/3556))
+- Update context pressure warnings and token estimates after compaction ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+### Architecture & Dependencies
+- **Remove mini-swe-agent dependency** — Inline Docker and Modal backends directly ([#2804](https://github.com/NousResearch/hermes-agent/pull/2804))
+- **Replace swe-rex with native Modal SDK** for Modal backend ([#3538](https://github.com/NousResearch/hermes-agent/pull/3538))
+- **Plugin lifecycle hooks** — `pre_llm_call`, `post_llm_call`, `on_session_start`, `on_session_end` now fire in the agent loop ([#3542](https://github.com/NousResearch/hermes-agent/pull/3542))
+- Fix plugin toolsets invisible in `hermes tools` and standalone processes ([#3457](https://github.com/NousResearch/hermes-agent/pull/3457))
+- Consolidate `get_hermes_home()` and `parse_reasoning_effort()` ([#3062](https://github.com/NousResearch/hermes-agent/pull/3062))
+- Remove unused Hermes-native PKCE OAuth flow ([#3107](https://github.com/NousResearch/hermes-agent/pull/3107))
+- Remove ~100 unused imports across 55 files ([#3016](https://github.com/NousResearch/hermes-agent/pull/3016))
+- Fix 154 f-strings, simplify getattr/URL patterns, remove dead code ([#3119](https://github.com/NousResearch/hermes-agent/pull/3119))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Telegram
+- **Private Chat Topics** — Project-based conversations with functional skill binding per topic, enabling isolated workflows within a single Telegram chat ([#3163](https://github.com/NousResearch/hermes-agent/pull/3163))
+- **Auto-discover fallback IPs via DNS-over-HTTPS** when `api.telegram.org` is unreachable ([#3376](https://github.com/NousResearch/hermes-agent/pull/3376))
+- **Configurable reply threading mode** ([#2907](https://github.com/NousResearch/hermes-agent/pull/2907))
+- Fall back to no `thread_id` on "Message thread not found" BadRequest ([#3390](https://github.com/NousResearch/hermes-agent/pull/3390))
+- Self-reschedule reconnect when `start_polling` fails after 502 ([#3268](https://github.com/NousResearch/hermes-agent/pull/3268))
+
+### Discord
+- Stop phantom typing indicator after agent turn completes ([#3003](https://github.com/NousResearch/hermes-agent/pull/3003))
+
+### Slack
+- Send tool call progress messages to correct Slack thread ([#3063](https://github.com/NousResearch/hermes-agent/pull/3063))
+- Scope progress thread fallback to Slack only ([#3488](https://github.com/NousResearch/hermes-agent/pull/3488))
+
+### WhatsApp
+- Download documents, audio, and video media from messages ([#2978](https://github.com/NousResearch/hermes-agent/pull/2978))
+
+### Matrix
+- Add missing Matrix entry in `PLATFORMS` dict ([#3473](https://github.com/NousResearch/hermes-agent/pull/3473))
+- Harden e2ee access-token handling ([#3562](https://github.com/NousResearch/hermes-agent/pull/3562))
+- Add backoff for `SyncError` in sync loop ([#3280](https://github.com/NousResearch/hermes-agent/pull/3280))
+
+### Signal
+- Track SSE keepalive comments as connection activity ([#3316](https://github.com/NousResearch/hermes-agent/pull/3316))
+
+### Email
+- Prevent unbounded growth of `_seen_uids` in EmailAdapter ([#3490](https://github.com/NousResearch/hermes-agent/pull/3490))
+
+### Gateway Core
+- **Config-gated `/verbose` command** for messaging platforms — toggle tool output verbosity from chat ([#3262](https://github.com/NousResearch/hermes-agent/pull/3262))
+- **Background review notifications** delivered to user chat ([#3293](https://github.com/NousResearch/hermes-agent/pull/3293))
+- **Retry transient send failures** and notify user on exhaustion ([#3288](https://github.com/NousResearch/hermes-agent/pull/3288))
+- Recover from hung agents — `/stop` hard-kills session lock ([#3104](https://github.com/NousResearch/hermes-agent/pull/3104))
+- Thread-safe `SessionStore` — protect `_entries` with `threading.Lock` ([#3052](https://github.com/NousResearch/hermes-agent/pull/3052))
+- Fix gateway token double-counting with cached agents — use absolute set instead of increment ([#3306](https://github.com/NousResearch/hermes-agent/pull/3306), [#3317](https://github.com/NousResearch/hermes-agent/pull/3317))
+- Fingerprint full auth token in agent cache signature ([#3247](https://github.com/NousResearch/hermes-agent/pull/3247))
+- Silence background agent terminal output ([#3297](https://github.com/NousResearch/hermes-agent/pull/3297))
+- Include per-platform `ALLOW_ALL` and `SIGNAL_GROUP` in startup allowlist check ([#3313](https://github.com/NousResearch/hermes-agent/pull/3313))
+- Include user-local bin paths in systemd unit PATH ([#3527](https://github.com/NousResearch/hermes-agent/pull/3527))
+- Track background task references in `GatewayRunner` ([#3254](https://github.com/NousResearch/hermes-agent/pull/3254))
+- Add request timeouts to HA, Email, Mattermost, SMS adapters ([#3258](https://github.com/NousResearch/hermes-agent/pull/3258))
+- Add media download retry to Mattermost, Slack, and base cache ([#3323](https://github.com/NousResearch/hermes-agent/pull/3323))
+- Detect virtualenv path instead of hardcoding `venv/` ([#2797](https://github.com/NousResearch/hermes-agent/pull/2797))
+- Use `TERMINAL_CWD` for context file discovery, not process cwd ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Stop loading hermes repo AGENTS.md into gateway sessions (~10k wasted tokens) ([#2891](https://github.com/NousResearch/hermes-agent/pull/2891))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### Interactive CLI
+- **Configurable busy input mode** + fix `/queue` always working ([#3298](https://github.com/NousResearch/hermes-agent/pull/3298))
+- **Preserve user input on multiline paste** ([#3065](https://github.com/NousResearch/hermes-agent/pull/3065))
+- **Tool generation callback** — streaming "preparing terminal…" updates during tool argument generation ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Show tool progress for substantive tools, not just "preparing" ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Buffer reasoning preview chunks and fix duplicate display ([#3013](https://github.com/NousResearch/hermes-agent/pull/3013))
+- Prevent reasoning box from rendering 3x during tool-calling loops ([#3405](https://github.com/NousResearch/hermes-agent/pull/3405))
+- Eliminate "Event loop is closed" / "Press ENTER to continue" during idle — three-layer fix with `neuter_async_httpx_del()`, custom exception handler, and stale client cleanup ([#3398](https://github.com/NousResearch/hermes-agent/pull/3398))
+- Fix status bar shows 26K instead of 260K for token counts with trailing zeros ([#3024](https://github.com/NousResearch/hermes-agent/pull/3024))
+- Fix status bar duplicates and degrades during long sessions ([#3291](https://github.com/NousResearch/hermes-agent/pull/3291))
+- Refresh TUI before background task output to prevent status bar overlap ([#3048](https://github.com/NousResearch/hermes-agent/pull/3048))
+- Suppress KawaiiSpinner animation under `patch_stdout` ([#2994](https://github.com/NousResearch/hermes-agent/pull/2994))
+- Skip KawaiiSpinner when TUI handles tool progress ([#2973](https://github.com/NousResearch/hermes-agent/pull/2973))
+- Guard `isatty()` against closed streams via `_is_tty` property ([#3056](https://github.com/NousResearch/hermes-agent/pull/3056))
+- Ensure single closure of streaming boxes during tool generation ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Cap context pressure percentage at 100% in display ([#3480](https://github.com/NousResearch/hermes-agent/pull/3480))
+- Clean up HTML error messages in CLI display ([#3069](https://github.com/NousResearch/hermes-agent/pull/3069))
+- Show HTTP status code and 400 body in API error output ([#3096](https://github.com/NousResearch/hermes-agent/pull/3096))
+- Extract useful info from HTML error pages, dump debug on max retries ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Prevent TypeError on startup when `base_url` is None ([#3068](https://github.com/NousResearch/hermes-agent/pull/3068))
+- Prevent update crash in non-TTY environments ([#3094](https://github.com/NousResearch/hermes-agent/pull/3094))
+- Handle EOFError in sessions delete/prune confirmation prompts ([#3101](https://github.com/NousResearch/hermes-agent/pull/3101))
+- Catch KeyboardInterrupt during `flush_memories` on exit and in exit cleanup handlers ([#3025](https://github.com/NousResearch/hermes-agent/pull/3025), [#3257](https://github.com/NousResearch/hermes-agent/pull/3257))
+- Guard `.strip()` against None values from YAML config ([#3552](https://github.com/NousResearch/hermes-agent/pull/3552))
+- Guard `config.get()` against YAML null values to prevent AttributeError ([#3377](https://github.com/NousResearch/hermes-agent/pull/3377))
+- Store asyncio task references to prevent GC mid-execution ([#3267](https://github.com/NousResearch/hermes-agent/pull/3267))
+
+### Setup & Configuration
+- Use explicit key mapping for returning-user menu dispatch instead of positional index ([#3083](https://github.com/NousResearch/hermes-agent/pull/3083))
+- Use `sys.executable` for pip in update commands to fix PEP 668 ([#3099](https://github.com/NousResearch/hermes-agent/pull/3099))
+- Harden `hermes update` against diverged history, non-main branches, and gateway edge cases ([#3492](https://github.com/NousResearch/hermes-agent/pull/3492))
+- OpenClaw migration overwrites defaults and setup wizard skips imported sections — fixed ([#3282](https://github.com/NousResearch/hermes-agent/pull/3282))
+- Stop recursive AGENTS.md walk, load top-level only ([#3110](https://github.com/NousResearch/hermes-agent/pull/3110))
+- Add macOS Homebrew paths to browser and terminal PATH resolution ([#2713](https://github.com/NousResearch/hermes-agent/pull/2713))
+- YAML boolean handling for `tool_progress` config ([#3300](https://github.com/NousResearch/hermes-agent/pull/3300))
+- Reset default SOUL.md to baseline identity text ([#3159](https://github.com/NousResearch/hermes-agent/pull/3159))
+- Reject relative cwd paths for container terminal backends ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Add explicit `hermes-api-server` toolset for API server platform ([#3304](https://github.com/NousResearch/hermes-agent/pull/3304))
+- Reorder setup wizard providers — OpenRouter first ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+---
+
+## 🔧 Tool System
+
+### API Server
+- **Idempotency-Key support**, body size limit, and OpenAI error envelope ([#2903](https://github.com/NousResearch/hermes-agent/pull/2903))
+- Allow Idempotency-Key in CORS headers ([#3530](https://github.com/NousResearch/hermes-agent/pull/3530))
+- Cancel orphaned agent + true interrupt on SSE disconnect ([#3427](https://github.com/NousResearch/hermes-agent/pull/3427))
+- Fix streaming breaks when agent makes tool calls ([#2985](https://github.com/NousResearch/hermes-agent/pull/2985))
+
+### Terminal & File Operations
+- Handle addition-only hunks in V4A patch parser ([#3325](https://github.com/NousResearch/hermes-agent/pull/3325))
+- Exponential backoff for persistent shell polling ([#2996](https://github.com/NousResearch/hermes-agent/pull/2996))
+- Add timeout to subprocess calls in `context_references` ([#3469](https://github.com/NousResearch/hermes-agent/pull/3469))
+
+### Browser & Vision
+- Handle 402 insufficient credits error in vision tool ([#2802](https://github.com/NousResearch/hermes-agent/pull/2802))
+- Fix `browser_vision` ignores `auxiliary.vision.timeout` config ([#2901](https://github.com/NousResearch/hermes-agent/pull/2901))
+- Make browser command timeout configurable via config.yaml ([#2801](https://github.com/NousResearch/hermes-agent/pull/2801))
+
+### MCP
+- MCP toolset resolution for runtime and config ([#3252](https://github.com/NousResearch/hermes-agent/pull/3252))
+- Add MCP tool name collision protection ([#3077](https://github.com/NousResearch/hermes-agent/pull/3077))
+
+### Auxiliary LLM
+- Guard aux LLM calls against None content + reasoning fallback + retry ([#3449](https://github.com/NousResearch/hermes-agent/pull/3449))
+- Catch ImportError from `build_anthropic_client` in vision auto-detection ([#3312](https://github.com/NousResearch/hermes-agent/pull/3312))
+
+### Other Tools
+- Add request timeouts to `send_message_tool` HTTP calls ([#3162](https://github.com/NousResearch/hermes-agent/pull/3162)) by @memosr
+- Auto-repair `jobs.json` with invalid control characters ([#3537](https://github.com/NousResearch/hermes-agent/pull/3537))
+- Enable fine-grained tool streaming for Claude/OpenRouter ([#3497](https://github.com/NousResearch/hermes-agent/pull/3497))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills System
+- **Env var passthrough** for skills and user config — skills can declare environment variables to pass through ([#2807](https://github.com/NousResearch/hermes-agent/pull/2807))
+- Cache skills prompt with shared `skill_utils` module for faster TTFT ([#3421](https://github.com/NousResearch/hermes-agent/pull/3421))
+- Avoid redundant file re-read for skill conditions ([#2992](https://github.com/NousResearch/hermes-agent/pull/2992))
+- Use Git Trees API to prevent silent subdirectory loss during install ([#2995](https://github.com/NousResearch/hermes-agent/pull/2995))
+- Fix skills-sh install for deeply nested repo structures ([#2980](https://github.com/NousResearch/hermes-agent/pull/2980))
+- Handle null metadata in skill frontmatter ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Preserve trust for skills-sh identifiers + reduce resolution churn ([#3251](https://github.com/NousResearch/hermes-agent/pull/3251))
+- Agent-created skills were incorrectly treated as untrusted community content — fixed ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+### New Skills
+- **G0DM0D3 godmode jailbreaking skill** + docs ([#3157](https://github.com/NousResearch/hermes-agent/pull/3157))
+- **Docker management skill** added to optional-skills ([#3060](https://github.com/NousResearch/hermes-agent/pull/3060))
+- **OpenClaw migration v2** — 17 new modules, terminal recap for migrating from OpenClaw to Hermes ([#2906](https://github.com/NousResearch/hermes-agent/pull/2906))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security Hardening
+- **SSRF protection** added to `browser_navigate` ([#3058](https://github.com/NousResearch/hermes-agent/pull/3058))
+- **SSRF protection** added to `vision_tools` and `web_tools` (hardened) ([#2679](https://github.com/NousResearch/hermes-agent/pull/2679))
+- **Restrict subagent toolsets** to parent's enabled set ([#3269](https://github.com/NousResearch/hermes-agent/pull/3269))
+- **Prevent zip-slip path traversal** in self-update ([#3250](https://github.com/NousResearch/hermes-agent/pull/3250))
+- **Prevent shell injection** in `_expand_path` via `~user` path suffix ([#2685](https://github.com/NousResearch/hermes-agent/pull/2685))
+- **Normalize input** before dangerous command detection ([#3260](https://github.com/NousResearch/hermes-agent/pull/3260))
+- Make tirith block verdicts approvable instead of hard-blocking ([#3428](https://github.com/NousResearch/hermes-agent/pull/3428))
+- Remove compromised `litellm`/`typer`/`platformdirs` from deps ([#2796](https://github.com/NousResearch/hermes-agent/pull/2796))
+- Pin all dependency version ranges ([#2810](https://github.com/NousResearch/hermes-agent/pull/2810))
+- Regenerate `uv.lock` with hashes, use lockfile in setup ([#2812](https://github.com/NousResearch/hermes-agent/pull/2812))
+- Bump dependencies to fix CVEs + regenerate `uv.lock` ([#3073](https://github.com/NousResearch/hermes-agent/pull/3073))
+- Supply chain audit CI workflow for PR scanning ([#2816](https://github.com/NousResearch/hermes-agent/pull/2816))
+
+### Reliability
+- **SQLite WAL write-lock contention** causing 15-20s TUI freeze — fixed ([#3385](https://github.com/NousResearch/hermes-agent/pull/3385))
+- **SQLite concurrency hardening** + session transcript integrity ([#3249](https://github.com/NousResearch/hermes-agent/pull/3249))
+- Prevent recurring cron job re-fire on gateway crash/restart loop ([#3396](https://github.com/NousResearch/hermes-agent/pull/3396))
+- Mark cron session as ended after job completes ([#2998](https://github.com/NousResearch/hermes-agent/pull/2998))
+
+---
+
+## ⚡ Performance
+
+- **TTFT startup optimizations** — salvaged easy-win startup improvements ([#3395](https://github.com/NousResearch/hermes-agent/pull/3395))
+- Cache skills prompt with shared `skill_utils` module ([#3421](https://github.com/NousResearch/hermes-agent/pull/3421))
+- Avoid redundant file re-read for skill conditions in prompt builder ([#2992](https://github.com/NousResearch/hermes-agent/pull/2992))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+- Fix gateway token double-counting with cached agents ([#3306](https://github.com/NousResearch/hermes-agent/pull/3306), [#3317](https://github.com/NousResearch/hermes-agent/pull/3317))
+- Fix "Event loop is closed" / "Press ENTER to continue" during idle sessions ([#3398](https://github.com/NousResearch/hermes-agent/pull/3398))
+- Fix reasoning box rendering 3x during tool-calling loops ([#3405](https://github.com/NousResearch/hermes-agent/pull/3405))
+- Fix status bar shows 26K instead of 260K for token counts ([#3024](https://github.com/NousResearch/hermes-agent/pull/3024))
+- Fix `/queue` always working regardless of config ([#3298](https://github.com/NousResearch/hermes-agent/pull/3298))
+- Fix phantom Discord typing indicator after agent turn ([#3003](https://github.com/NousResearch/hermes-agent/pull/3003))
+- Fix Slack progress messages appearing in wrong thread ([#3063](https://github.com/NousResearch/hermes-agent/pull/3063))
+- Fix WhatsApp media downloads (documents, audio, video) ([#2978](https://github.com/NousResearch/hermes-agent/pull/2978))
+- Fix Telegram "Message thread not found" killing progress messages ([#3390](https://github.com/NousResearch/hermes-agent/pull/3390))
+- Fix OpenClaw migration overwriting defaults ([#3282](https://github.com/NousResearch/hermes-agent/pull/3282))
+- Fix returning-user setup menu dispatching wrong section ([#3083](https://github.com/NousResearch/hermes-agent/pull/3083))
+- Fix `hermes update` PEP 668 "externally-managed-environment" error ([#3099](https://github.com/NousResearch/hermes-agent/pull/3099))
+- Fix subagents hitting `max_iterations` prematurely via shared budget ([#3004](https://github.com/NousResearch/hermes-agent/pull/3004))
+- Fix YAML boolean handling for `tool_progress` config ([#3300](https://github.com/NousResearch/hermes-agent/pull/3300))
+- Fix `config.get()` crashes on YAML null values ([#3377](https://github.com/NousResearch/hermes-agent/pull/3377))
+- Fix `.strip()` crash on None values from YAML config ([#3552](https://github.com/NousResearch/hermes-agent/pull/3552))
+- Fix hung agents on gateway — `/stop` now hard-kills session lock ([#3104](https://github.com/NousResearch/hermes-agent/pull/3104))
+- Fix `_custom` provider silently remapped to `openrouter` ([#2792](https://github.com/NousResearch/hermes-agent/pull/2792))
+- Fix Matrix missing from `PLATFORMS` dict ([#3473](https://github.com/NousResearch/hermes-agent/pull/3473))
+- Fix Email adapter unbounded `_seen_uids` growth ([#3490](https://github.com/NousResearch/hermes-agent/pull/3490))
+
+---
+
+## 🧪 Testing
+
+- Pin `agent-client-protocol` < 0.9 to handle breaking upstream release ([#3320](https://github.com/NousResearch/hermes-agent/pull/3320))
+- Catch anthropic ImportError in vision auto-detection tests ([#3312](https://github.com/NousResearch/hermes-agent/pull/3312))
+- Update retry-exhaust test for new graceful return behavior ([#3320](https://github.com/NousResearch/hermes-agent/pull/3320))
+- Add regression tests for null metadata frontmatter ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+---
+
+## 📚 Documentation
+
+- Update all docs for `/model` command overhaul and custom provider support ([#2800](https://github.com/NousResearch/hermes-agent/pull/2800))
+- Fix stale and incorrect documentation across 18 files ([#2805](https://github.com/NousResearch/hermes-agent/pull/2805))
+- Document 9 previously undocumented features ([#2814](https://github.com/NousResearch/hermes-agent/pull/2814))
+- Add missing skills, CLI commands, and messaging env vars to docs ([#2809](https://github.com/NousResearch/hermes-agent/pull/2809))
+- Fix api-server response storage documentation — SQLite, not in-memory ([#2819](https://github.com/NousResearch/hermes-agent/pull/2819))
+- Quote pip install extras to fix zsh glob errors ([#2815](https://github.com/NousResearch/hermes-agent/pull/2815))
+- Unify hooks documentation — add plugin hooks to hooks page, add `session:end` event ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Clarify two-mode behavior in `session_search` schema description ([untagged commit](https://github.com/NousResearch/hermes-agent))
+- Fix Discord Public Bot setting for Discord-provided invite link ([#3519](https://github.com/NousResearch/hermes-agent/pull/3519)) by @mehmoodosman
+- Revise v0.4.0 changelog — fix feature attribution, reorder sections ([untagged commit](https://github.com/NousResearch/hermes-agent))
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** — 157 PRs covering the full scope of this release
+
+### Community Contributors
+- **@alt-glitch** (Siddharth Balyan) — 2 PRs: Nix flake with uv2nix build, NixOS module, and persistent container mode ([#20](https://github.com/NousResearch/hermes-agent/pull/20)); auto-generated config keys and suffix PATHs for Nix builds ([#3061](https://github.com/NousResearch/hermes-agent/pull/3061), [#3274](https://github.com/NousResearch/hermes-agent/pull/3274))
+- **@ctlst** — 1 PR: Prevent AsyncOpenAI/httpx cross-loop deadlock in gateway mode ([#2701](https://github.com/NousResearch/hermes-agent/pull/2701))
+- **@memosr** (memosr.eth) — 1 PR: Add request timeouts to `send_message_tool` HTTP calls ([#3162](https://github.com/NousResearch/hermes-agent/pull/3162))
+- **@mehmoodosman** (Osman Mehmood) — 1 PR: Fix Discord docs for Public Bot setting ([#3519](https://github.com/NousResearch/hermes-agent/pull/3519))
+
+### All Contributors
+@alt-glitch, @ctlst, @mehmoodosman, @memosr, @teknium1
+
+---
+
+**Full Changelog**: [v2026.3.23...v2026.3.28](https://github.com/NousResearch/hermes-agent/compare/v2026.3.23...v2026.3.28)
@@ -1458,6 +1458,29 @@ def _resolve_task_provider_model(
    return "auto", resolved_model, None, None


+_DEFAULT_AUX_TIMEOUT = 30.0
+
+
+def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
+    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+    if not task:
+        return default
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+    except ImportError:
+        return default
+    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
+    task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
+    raw = task_config.get("timeout")
+    if raw is not None:
+        try:
+            return float(raw)
+        except (ValueError, TypeError):
+            pass
+    return default
+
+
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -1515,7 +1538,7 @@ def call_llm(
    temperature: float = None,
    max_tokens: int = None,
    tools: list = None,
-    timeout: float = 30.0,
+    timeout: float = None,
    extra_body: dict = None,
 ) -> Any:
    """Centralized synchronous LLM call.
@@ -1533,7 +1556,7 @@ def call_llm(
        temperature: Sampling temperature (None = provider default).
        max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
        tools: Tool definitions (for function calling).
-        timeout: Request timeout in seconds.
+        timeout: Request timeout in seconds (None = read from auxiliary.{task}.timeout config).
        extra_body: Additional request body fields.

    Returns:
@@ -1598,10 +1621,12 @@ def call_llm(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
                f"Run: hermes setup")

+    effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
+
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body,
+        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

    # Handle max_tokens vs max_completion_tokens retry
@@ -1683,7 +1708,7 @@ async def async_call_llm(
    temperature: float = None,
    max_tokens: int = None,
    tools: list = None,
-    timeout: float = 30.0,
+    timeout: float = None,
    extra_body: dict = None,
 ) -> Any:
    """Centralized asynchronous LLM call.
@@ -1744,10 +1769,12 @@ async def async_call_llm(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
                f"Run: hermes setup")

+    effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
+
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body,
+        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

    try:
@@ -141,7 +141,7 @@ class ContextCompressor:
            "last_prompt_tokens": self.last_prompt_tokens,
            "threshold_tokens": self.threshold_tokens,
            "context_length": self.context_length,
-            "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
+            "usage_percent": min(100, (self.last_prompt_tokens / self.context_length * 100)) if self.context_length else 0,
            "compression_count": self.compression_count,
        }

@@ -347,7 +347,7 @@ Write only the summary body. Do not include any preamble or prefix."""
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.3,
                "max_tokens": summary_budget * 2,
-                "timeout": 45.0,
+                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
            if self.summary_model:
                call_kwargs["model"] = self.summary_model
@@ -284,11 +284,11 @@ class KawaiiSpinner:
        The CLI already drives a TUI widget (_spinner_text) for spinner display,
        so KawaiiSpinner's \\r-based animation is redundant under StdoutProxy.
        """
-        out = self._out
-        # StdoutProxy has a 'raw' attribute (bool) that plain file objects lack.
-        if hasattr(out, 'raw') and type(out).__name__ == 'StdoutProxy':
-            return True
-        return False
+        try:
+            from prompt_toolkit.patch_stdout import StdoutProxy
+            return isinstance(self._out, StdoutProxy)
+        except ImportError:
+            return False

    def _animate(self):
        # When stdout is not a real terminal (e.g. Docker, systemd, pipe),
@@ -699,7 +699,7 @@ def format_context_pressure(
        threshold_percent: Compaction threshold as a fraction of context window.
        compression_enabled: Whether auto-compression is active.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

@@ -729,7 +729,7 @@ def format_context_pressure_gateway(
    No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
    The percentage shows progress toward the compaction threshold.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

@@ -15,6 +15,8 @@ import time
 from pathlib import Path
 from typing import Any, Dict, Optional

+from utils import atomic_json_write
+
 import requests

 logger = logging.getLogger(__name__)
@@ -64,12 +66,10 @@ def _load_disk_cache() -> Dict[str, Any]:


 def _save_disk_cache(data: Dict[str, Any]) -> None:
-    """Save models.dev data to disk cache."""
+    """Save models.dev data to disk cache atomically."""
    try:
        cache_path = _get_cache_path()
-        cache_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(cache_path, "w", encoding="utf-8") as f:
-            json.dump(data, f, separators=(",", ":"))
+        atomic_json_write(cache_path, data, indent=None, separators=(",", ":"))
    except Exception as e:
        logger.debug("Failed to save models.dev disk cache: %s", e)

@@ -169,6 +169,25 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

+TOOL_USE_ENFORCEMENT_GUIDANCE = (
+    "# Tool-use enforcement\n"
+    "You MUST use your tools to take action — do not describe what you would do "
+    "or plan to do without actually doing it. When you say you will perform an "
+    "action (e.g. 'I will run the tests', 'Let me check the file', 'I will create "
+    "the project'), you MUST immediately make the corresponding tool call in the same "
+    "response. Never end your turn with a promise of future action — execute it now.\n"
+    "Keep working until the task is actually complete. Do not stop with a summary of "
+    "what you plan to do next time. If you have tools available that can accomplish "
+    "the task, use them instead of telling the user what you would do.\n"
+    "Every response should either (a) contain tool calls that make progress, or "
+    "(b) deliver a final result to the user. Responses that only describe intentions "
+    "without acting are not acceptable."
+)
+
+# Model name substrings that trigger tool-use enforcement guidance.
+# Add new patterns here when a model family needs explicit steering.
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")
+
 PLATFORM_HINTS = {
    "whatsapp": (
        "You are on a text messaging communication platform, WhatsApp. "
@@ -19,7 +19,7 @@ _TITLE_PROMPT = (
 )


-def generate_title(user_message: str, assistant_response: str, timeout: float = 15.0) -> Optional[str]:
+def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
    """Generate a session title from the first exchange.

    Uses the auxiliary LLM client (cheapest/fastest available model).
@@ -7,6 +7,7 @@
 # =============================================================================
 model:
  # Default model to use (can be overridden with --model flag)
+  # Both "default" and "model" work as the key name here.
  default: "anthropic/claude-opus-4.6"
  
  # Inference provider selection:
@@ -1078,12 +1078,12 @@ class HermesCLI:
        # authoritative.  This avoids conflicts in multi-agent setups where
        # env vars would stomp each other.
        _model_config = CLI_CONFIG.get("model", {})
-        _config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
+        _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
        _FALLBACK_MODEL = "anthropic/claude-opus-4.6"
        self.model = model or _config_model or _FALLBACK_MODEL
        # Auto-detect model from local server if still on fallback
        if self.model == _FALLBACK_MODEL:
-            _base_url = _model_config.get("base_url", "") if isinstance(_model_config, dict) else ""
+            _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
            if "localhost" in _base_url or "127.0.0.1" in _base_url:
                from hermes_cli.runtime_provider import _auto_detect_local_model
                _detected = _auto_detect_local_model(_base_url)
@@ -4034,6 +4034,17 @@ class HermesCLI:
                    provider_data_collection=self._provider_data_collection,
                    fallback_model=self._fallback_model,
                )
+                # Silence raw spinner; route thinking through TUI widget when no foreground agent is active.
+                bg_agent._print_fn = lambda *_a, **_kw: None
+
+                def _bg_thinking(text: str) -> None:
+                    # Concurrent bg tasks may race on _spinner_text; acceptable for best-effort UI.
+                    if not self._agent_running:
+                        self._spinner_text = text
+                        if self._app:
+                            self._app.invalidate()
+
+                bg_agent.thinking_callback = _bg_thinking

                result = bg_agent.run_conversation(
                    user_message=prompt,
@@ -4096,6 +4107,9 @@ class HermesCLI:
                _cprint(f"  ❌ Background task #{task_num} failed: {e}")
            finally:
                self._background_tasks.pop(task_id, None)
+                # Clear spinner only if no foreground agent owns it
+                if not self._agent_running:
+                    self._spinner_text = ""
                if self._app:
                    self._invalidate(min_interval=0)

@@ -4506,7 +4520,7 @@ class HermesCLI:
        compressor = agent.context_compressor
        last_prompt = compressor.last_prompt_tokens
        ctx_len = compressor.context_length
-        pct = (last_prompt / ctx_len * 100) if ctx_len else 0
+        pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0
        compressions = compressor.compression_count

        msg_count = len(self.conversation_history)
@@ -5534,6 +5548,13 @@ class HermesCLI:
            except Exception as e:
                logging.debug("@ context reference expansion failed: %s", e)

+        # Sanitize surrogate characters that can arrive via clipboard paste from
+        # rich-text editors (Google Docs, Word, etc.).  Lone surrogates are invalid
+        # UTF-8 and crash JSON serialization in the OpenAI SDK.
+        if isinstance(message, str):
+            from run_agent import _sanitize_surrogates
+            message = _sanitize_surrogates(message)
+
        # Add user message to history
        self.conversation_history.append({"role": "user", "content": message})

@@ -5891,10 +5912,22 @@ class HermesCLI:
            else:
                duration_str = f"{seconds}s"
            
+            # Look up session title for resume-by-name hint
+            session_title = None
+            if self._session_db:
+                try:
+                    session_title = self._session_db.get_session_title(self.session_id)
+                except Exception:
+                    pass
+
            print("Resume this session with:")
            print(f"  hermes --resume {self.session_id}")
+            if session_title:
+                print(f"  hermes -c \"{session_title}\"")
            print()
            print(f"Session:        {self.session_id}")
+            if session_title:
+                print(f"Title:          {session_title}")
            print(f"Duration:       {duration_str}")
            print(f"Messages:       {msg_count} ({user_msgs} user, {tool_calls} tool calls)")
        else:
@@ -6070,7 +6103,7 @@ class HermesCLI:
            from honcho_integration.client import HonchoClientConfig
            from agent.display import honcho_session_line, write_tty
            hcfg = HonchoClientConfig.from_global_config()
-            if hcfg.enabled and hcfg.api_key and hcfg.explicitly_configured:
+            if hcfg.enabled and (hcfg.api_key or hcfg.base_url) and hcfg.explicitly_configured:
                sname = hcfg.resolve_session_name(session_id=self.session_id)
                if sname:
                    write_tty(honcho_session_line(hcfg.workspace_id, sname) + "\n")
@@ -6644,6 +6677,7 @@ class HermesCLI:
        # Paste collapsing: detect large pastes and save to temp file
        _paste_counter = [0]
        _prev_text_len = [0]
+        _prev_newline_count = [0]
        _paste_just_collapsed = [False]

        def _on_text_changed(buf):
@@ -6652,18 +6686,27 @@ class HermesCLI:
            When bracketed paste is available, handle_paste collapses
            large pastes directly.  This handler is a fallback for
            terminals without bracketed paste support.
+
+            Two heuristics (either triggers collapse):
+            1. Many characters added at once (chars_added > 1) — works
+               when the terminal delivers the paste in one event-loop tick.
+            2. Newline count jumped by 4+ in a single text-change event —
+               catches terminals that feed characters individually but
+               still batch newlines.  Alt+Enter only adds 1 newline per
+               event so it never triggers this.
            """
            text = buf.text
            chars_added = len(text) - _prev_text_len[0]
            _prev_text_len[0] = len(text)
            if _paste_just_collapsed[0]:
                _paste_just_collapsed[0] = False
+                _prev_newline_count[0] = text.count('\n')
                return
            line_count = text.count('\n')
-            # Heuristic: a real paste adds many characters at once (not just a
-            # single newline from Alt+Enter) AND the result has 5+ lines.
-            # Fallback for terminals without bracketed paste support.
-            if line_count >= 5 and chars_added > 1 and not text.startswith('/'):
+            newlines_added = line_count - _prev_newline_count[0]
+            _prev_newline_count[0] = line_count
+            is_paste = chars_added > 1 or newlines_added >= 4
+            if line_count >= 5 and is_paste and not text.startswith('/'):
                _paste_counter[0] += 1
                # Save to temp file
                paste_dir = _hermes_home / "pastes"
@@ -6671,6 +6714,7 @@ class HermesCLI:
                paste_file = paste_dir / f"paste_{_paste_counter[0]}_{datetime.now().strftime('%H%M%S')}.txt"
                paste_file.write_text(text, encoding="utf-8")
                # Replace buffer with compact reference
+                _paste_just_collapsed[0] = True
                buf.text = f"[Pasted text #{_paste_counter[0]}: {line_count + 1} lines \u2192 {paste_file}]"
                buf.cursor_position = len(buf.text)

@@ -327,7 +327,20 @@ def load_jobs() -> List[Dict[str, Any]]:
        with open(JOBS_FILE, 'r', encoding='utf-8') as f:
            data = json.load(f)
            return data.get("jobs", [])
-    except (json.JSONDecodeError, IOError):
+    except json.JSONDecodeError:
+        # Retry with strict=False to handle bare control chars in string values
+        try:
+            with open(JOBS_FILE, 'r', encoding='utf-8') as f:
+                data = json.loads(f.read(), strict=False)
+                jobs = data.get("jobs", [])
+                if jobs:
+                    # Auto-repair: rewrite with proper escaping
+                    save_jobs(jobs)
+                    logger.warning("Auto-repaired jobs.json (had invalid control characters)")
+                return jobs
+        except Exception:
+            return []
+    except IOError:
        return []


@@ -0,0 +1,15 @@
+# Hermes Agent Persona
+
+<!--
+This file defines the agent's personality and tone.
+The agent will embody whatever you write here.
+Edit this to customize how Hermes communicates with you.
+
+Examples:
+  - "You are a warm, playful assistant who uses kaomoji occasionally."
+  - "You are a concise technical expert. No fluff, just facts."
+  - "You speak like a friendly coworker who happens to know everything."
+
+This file is loaded fresh each message -- no restart needed.
+Delete the contents (or this file) to use the default personality.
+-->
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
+set -e
+
+HERMES_HOME="/opt/data"
+INSTALL_DIR="/opt/hermes"
+
+# Create directory structure
+mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills,whatsapp/session}
+
+# .env
+if [ ! -f "$HERMES_HOME/.env" ]; then
+    cp "$INSTALL_DIR/.env.example" "$HERMES_HOME/.env"
+fi
+
+# config.yaml
+if [ ! -f "$HERMES_HOME/config.yaml" ]; then
+    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
+fi
+
+# SOUL.md
+if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
+    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
+fi
+
+# Sync bundled skills (manifest-based so user edits are preserved)
+if [ -d "$INSTALL_DIR/skills" ]; then
+    python3 "$INSTALL_DIR/tools/skills_sync.py"
+fi
+
+exec hermes "$@"
@@ -0,0 +1,56 @@
+# Hermes Agent — Docker
+
+Want to run Hermes Agent, but without installing packages on your host? This'll sort you out.
+
+This will let you run the agent in a container, with the most relevant modes outlined below.
+
+The container stores all user data (config, API keys, sessions, skills, memories) in a single directory mounted from the host at `/opt/data`. The image itself is stateless and can be upgraded by pulling a new version without losing any configuration.
+
+## Quick start
+
+If this is your first time running Hermes Agent, create a data directory on the host and start the container interactively to run the setup wizard:
+
+```sh
+mkdir -p ~/.hermes
+docker run -it --rm \
+  -v ~/.hermes:/opt/data \
+  nousresearch/hermes-agent
+```
+
+This drops you into the setup wizard, which will prompt you for your API keys and write them to `~/.hermes/.env`. You only need to do this once. It is highly recommended to set up a chat system for the gateway to work with at this point.
+
+## Running in gateway mode
+
+Once configured, run the container in the background as a persistent gateway (Telegram, Discord, Slack, WhatsApp, etc.):
+
+```sh
+docker run -d \
+  --name hermes \
+  --restart unless-stopped \
+  -v ~/.hermes:/opt/data \
+  nousresearch/hermes-agent gateway run
+```
+
+## Running interactively (CLI chat)
+
+To open an interactive chat session against a running data directory:
+
+```sh
+docker run -it --rm \
+  -v ~/.hermes:/opt/data \
+  nousresearch/hermes-agent
+```
+
+## Upgrading
+
+Pull the latest image and recreate the container. Your data directory is untouched.
+
+```sh
+docker pull nousresearch/hermes-agent:latest
+docker rm -f hermes
+docker run -d \
+  --name hermes \
+  --restart unless-stopped \
+  -v ~/.hermes:/opt/data \
+  nousresearch/hermes-agent
+```
@@ -101,21 +101,11 @@ Available methods:

 ### Patches (`patches.py`)

-**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., the Modal backend via SWE-ReX). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.
+**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., the Modal backend). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.

-**Solution**: `patches.py` monkey-patches `SwerexModalEnvironment` to use a dedicated background thread (`_AsyncWorker`) with its own event loop. The calling code sees the same sync interface, but internally the async work happens on a separate thread that doesn't conflict with Atropos's loop.
+**Solution**: `ModalEnvironment` uses a dedicated `_AsyncWorker` background thread with its own event loop. The calling code sees a sync interface, but internally all async Modal SDK calls happen on the worker thread so they don't conflict with Atropos's loop. This is built directly into `tools/environments/modal.py` — no monkey-patching required.

-What gets patched:
- `SwerexModalEnvironment.__init__` -- creates Modal deployment on a background thread
- `SwerexModalEnvironment.execute` -- runs commands on the same background thread
- `SwerexModalEnvironment.stop` -- stops deployment on the background thread
-
-The patches are:
- **Idempotent** -- calling `apply_patches()` multiple times is safe
- **Transparent** -- same interface and behavior, only the internal async execution changes
- **Universal** -- works identically in normal CLI use (no running event loop)
-
-Applied automatically at import time by `hermes_base_env.py`.
+`patches.py` is now a no-op (kept for backward compatibility with imports).

 ### Tool Call Parsers (`tool_call_parsers/`)

@@ -25,7 +25,7 @@ import time
 from pathlib import Path
 from typing import Optional

-from hermes_cli.config import get_hermes_home
+from hermes_constants import get_hermes_dir


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -41,7 +41,7 @@ LOCKOUT_SECONDS = 3600              # Lockout duration after too many failures
 MAX_PENDING_PER_PLATFORM = 3        # Max pending codes per platform
 MAX_FAILED_ATTEMPTS = 5             # Failed approvals before lockout

-PAIRING_DIR = get_hermes_home() / "pairing"
+PAIRING_DIR = get_hermes_dir("platforms/pairing", "pairing")


 def _secure_write(path: Path, data: str) -> None:
@@ -166,7 +166,7 @@ class ResponseStore:

 _CORS_HEADERS = {
    "Access-Control-Allow-Methods": "GET, POST, DELETE, OPTIONS",
-    "Access-Control-Allow-Headers": "Authorization, Content-Type",
+    "Access-Control-Allow-Headers": "Authorization, Content-Type, Idempotency-Key",
 }


@@ -223,6 +223,23 @@ if AIOHTTP_AVAILABLE:
 else:
    body_limit_middleware = None  # type: ignore[assignment]

+_SECURITY_HEADERS = {
+    "X-Content-Type-Options": "nosniff",
+    "Referrer-Policy": "no-referrer",
+}
+
+
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def security_headers_middleware(request, handler):
+        """Add security headers to all responses (including errors)."""
+        response = await handler(request)
+        for k, v in _SECURITY_HEADERS.items():
+            response.headers.setdefault(k, v)
+        return response
+else:
+    security_headers_middleware = None  # type: ignore[assignment]
+

 class _IdempotencyCache:
    """In-memory idempotency cache with TTL and basic LRU semantics."""
@@ -307,6 +324,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if "*" in self._cors_origins:
            headers = dict(_CORS_HEADERS)
            headers["Access-Control-Allow-Origin"] = "*"
+            headers["Access-Control-Max-Age"] = "600"
            return headers

        if origin not in self._cors_origins:
@@ -315,6 +333,7 @@ class APIServerAdapter(BasePlatformAdapter):
        headers = dict(_CORS_HEADERS)
        headers["Access-Control-Allow-Origin"] = origin
        headers["Vary"] = "Origin"
+        headers["Access-Control-Max-Age"] = "600"
        return headers

    def _origin_allowed(self, origin: str) -> bool:
@@ -582,10 +601,14 @@ class APIServerAdapter(BasePlatformAdapter):
        """
        import queue as _q

-        response = web.StreamResponse(
-            status=200,
-            headers={"Content-Type": "text/event-stream", "Cache-Control": "no-cache"},
-        )
+        sse_headers = {"Content-Type": "text/event-stream", "Cache-Control": "no-cache"}
+        # CORS middleware can't inject headers into StreamResponse after
+        # prepare() flushes them, so resolve CORS headers up front.
+        origin = request.headers.get("Origin", "")
+        cors = self._cors_headers_for_origin(origin) if origin else None
+        if cors:
+            sse_headers.update(cors)
+        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

        try:
@@ -1218,10 +1241,11 @@ class APIServerAdapter(BasePlatformAdapter):
            return False

        try:
-            mws = [mw for mw in (cors_middleware, body_limit_middleware) if mw is not None]
+            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
+            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
@@ -27,6 +27,7 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
 from gateway.config import Platform, PlatformConfig
 from gateway.session import SessionSource, build_session_key
 from hermes_cli.config import get_hermes_home
+from hermes_constants import get_hermes_dir


 GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
@@ -44,8 +45,8 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
 # (e.g. Telegram file URLs expire after ~1 hour).
 # ---------------------------------------------------------------------------

-# Default location: {HERMES_HOME}/image_cache/
-IMAGE_CACHE_DIR = get_hermes_home() / "image_cache"
+# Default location: {HERMES_HOME}/cache/images/ (legacy: image_cache/)
+IMAGE_CACHE_DIR = get_hermes_dir("cache/images", "image_cache")


 def get_image_cache_dir() -> Path:
@@ -147,7 +148,7 @@ def cleanup_image_cache(max_age_hours: int = 24) -> int:
 # here so the STT tool (OpenAI Whisper) can transcribe them from local files.
 # ---------------------------------------------------------------------------

-AUDIO_CACHE_DIR = get_hermes_home() / "audio_cache"
+AUDIO_CACHE_DIR = get_hermes_dir("cache/audio", "audio_cache")


 def get_audio_cache_dir() -> Path:
@@ -174,29 +175,51 @@ def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
    return str(filepath)


-async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
+async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> str:
    """
    Download an audio file from a URL and save it to the local cache.

+    Retries on transient failures (timeouts, 429, 5xx) with exponential
+    backoff so a single slow CDN response doesn't lose the media.
+
    Args:
        url: The HTTP/HTTPS URL to download from.
        ext: File extension including the dot (e.g. ".ogg", ".mp3").
+        retries: Number of retry attempts on transient failures.

    Returns:
        Absolute path to the cached audio file as a string.
    """
+    import asyncio
    import httpx
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-        response = await client.get(
-            url,
-            headers={
-                "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
-                "Accept": "audio/*,*/*;q=0.8",
-            },
-        )
-        response.raise_for_status()
-        return cache_audio_from_bytes(response.content, ext)
+        for attempt in range(retries + 1):
+            try:
+                response = await client.get(
+                    url,
+                    headers={
+                        "User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
+                        "Accept": "audio/*,*/*;q=0.8",
+                    },
+                )
+                response.raise_for_status()
+                return cache_audio_from_bytes(response.content, ext)
+            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
+                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
+                    raise
+                if attempt < retries:
+                    wait = 1.5 * (attempt + 1)
+                    _log.debug("Audio cache retry %d/%d for %s (%.1fs): %s",
+                               attempt + 1, retries, url[:80], wait, exc)
+                    await asyncio.sleep(wait)
+                    continue
+                raise
+    raise last_exc


 # ---------------------------------------------------------------------------
@@ -206,7 +229,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
 # here so the agent can reference them by local file path.
 # ---------------------------------------------------------------------------

-DOCUMENT_CACHE_DIR = get_hermes_home() / "document_cache"
+DOCUMENT_CACHE_DIR = get_hermes_dir("cache/documents", "document_cache")

 SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
@@ -333,7 +356,10 @@ class MessageEvent:
            return None
        # Split on space and get first word, strip the /
        parts = self.text.split(maxsplit=1)
-        return parts[0][1:].lower() if parts else None
+        raw = parts[0][1:].lower() if parts else None
+        if raw and "@" in raw:
+            raw = raw.split("@", 1)[0]
+        return raw
    
    def get_command_args(self) -> str:
        """Get the arguments after a command."""
@@ -550,6 +550,22 @@ class DiscordAdapter(BasePlatformAdapter):
                            return
                    # "all" falls through to handle_message
                
+                # If the message @mentions other users but NOT the bot, the
+                # sender is talking to someone else — stay silent.  Only
+                # applies in server channels; in DMs the user is always
+                # talking to the bot (mentions are just references).
+                # Controlled by DISCORD_IGNORE_NO_MENTION (default: true).
+                _ignore_no_mention = os.getenv(
+                    "DISCORD_IGNORE_NO_MENTION", "true"
+                ).lower() in ("true", "1", "yes")
+                if _ignore_no_mention and message.mentions and not isinstance(message.channel, discord.DMChannel):
+                    _bot_mentioned = (
+                        self._client.user is not None
+                        and self._client.user in message.mentions
+                    )
+                    if not _bot_mentioned:
+                        return  # Talking to someone else, don't interrupt
+
                await self._handle_message(message)

            @self._client.event
@@ -43,6 +43,20 @@ from gateway.platforms.base import (
 from gateway.config import Platform, PlatformConfig

 logger = logging.getLogger(__name__)
+# Automated sender patterns — emails from these are silently ignored
+_NOREPLY_PATTERNS = (
+    "noreply", "no-reply", "no_reply", "donotreply", "do-not-reply",
+    "mailer-daemon", "postmaster", "bounce", "notifications@",
+    "automated@", "auto-confirm", "auto-reply", "automailer",
+)
+
+# RFC headers that indicate bulk/automated mail
+_AUTOMATED_HEADERS = {
+    "Auto-Submitted": lambda v: v.lower() != "no",
+    "Precedence": lambda v: v.lower() in ("bulk", "list", "junk"),
+    "X-Auto-Response-Suppress": lambda v: bool(v),
+    "List-Unsubscribe": lambda v: bool(v),
+}

 # Gmail-safe max length per email body
 MAX_MESSAGE_LENGTH = 50_000
@@ -50,7 +64,17 @@ MAX_MESSAGE_LENGTH = 50_000
 # Supported image extensions for inline detection
 _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp"}

-
+def _is_automated_sender(address: str, headers: dict) -> bool:
+    """Return True if this email is from an automated/noreply source."""
+    addr = address.lower()
+    if any(pattern in addr for pattern in _NOREPLY_PATTERNS):
+        return True
+    for header, check in _AUTOMATED_HEADERS.items():
+        value = headers.get(header, "")
+        if value and check(value):
+            return True
+    return False
+    
 def check_email_requirements() -> bool:
    """Check if email platform dependencies are available."""
    addr = os.getenv("EMAIL_ADDRESS")
@@ -213,6 +237,7 @@ class EmailAdapter(BasePlatformAdapter):

        # Track message IDs we've already processed to avoid duplicates
        self._seen_uids: set = set()
+        self._seen_uids_max: int = 2000   # cap to prevent unbounded memory growth
        self._poll_task: Optional[asyncio.Task] = None

        # Map chat_id (sender email) -> last subject + message-id for threading
@@ -220,6 +245,26 @@ class EmailAdapter(BasePlatformAdapter):

        logger.info("[Email] Adapter initialized for %s", self._address)

+    def _trim_seen_uids(self) -> None:
+        """Keep only the most recent UIDs to prevent unbounded memory growth.
+
+        IMAP UIDs are monotonically increasing integers. When the set grows
+        beyond the cap, we keep only the highest half — old UIDs are safe to
+        drop because new messages always have higher UIDs and IMAP's UNSEEN
+        flag prevents re-delivery regardless.
+        """
+        if len(self._seen_uids) <= self._seen_uids_max:
+            return
+        try:
+            # UIDs are bytes like b'1234' — sort numerically and keep top half
+            sorted_uids = sorted(self._seen_uids, key=lambda u: int(u))
+            keep = self._seen_uids_max // 2
+            self._seen_uids = set(sorted_uids[-keep:])
+            logger.debug("[Email] Trimmed seen UIDs to %d entries", len(self._seen_uids))
+        except (ValueError, TypeError):
+            # Fallback: just clear old entries if sort fails
+            self._seen_uids = set(list(self._seen_uids)[-self._seen_uids_max // 2:])
+
    async def connect(self) -> bool:
        """Connect to the IMAP server and start polling for new messages."""
        try:
@@ -232,6 +277,8 @@ class EmailAdapter(BasePlatformAdapter):
            if status == "OK" and data and data[0]:
                for uid in data[0].split():
                    self._seen_uids.add(uid)
+            # Keep only the most recent UIDs to prevent unbounded growth
+            self._trim_seen_uids()
            imap.logout()
            logger.info("[Email] IMAP connection test passed. %d existing messages skipped.", len(self._seen_uids))
        except Exception as e:
@@ -302,6 +349,9 @@ class EmailAdapter(BasePlatformAdapter):
                if uid in self._seen_uids:
                    continue
                self._seen_uids.add(uid)
+                # Trim periodically to prevent unbounded memory growth
+                if len(self._seen_uids) > self._seen_uids_max:
+                    self._trim_seen_uids()

                status, msg_data = imap.uid("fetch", uid, "(RFC822)")
                if status != "OK":
@@ -320,6 +370,11 @@ class EmailAdapter(BasePlatformAdapter):
                subject = _decode_header_value(msg.get("Subject", "(no subject)"))
                message_id = msg.get("Message-ID", "")
                in_reply_to = msg.get("In-Reply-To", "")
+                # Skip automated/noreply senders before any processing
+                msg_headers = dict(msg.items())
+                if _is_automated_sender(sender_addr, msg_headers):
+                    logger.debug("[Email] Skipping automated sender: %s", sender_addr)
+                    continue
                body = _extract_text_body(msg)
                attachments = _extract_attachments(msg, skip_attachments=self._skip_attachments)

@@ -348,6 +403,11 @@ class EmailAdapter(BasePlatformAdapter):
        if sender_addr == self._address.lower():
            return

+        # Never reply to automated senders
+        if _is_automated_sender(sender_addr, {}):
+            logger.debug("[Email] Dropping automated sender at dispatch: %s", sender_addr)
+            return
+
        subject = msg_data["subject"]
        body = msg_data["body"].strip()
        attachments = msg_data["attachments"]
@@ -40,7 +40,9 @@ logger = logging.getLogger(__name__)
 MAX_MESSAGE_LENGTH = 4000

 # Store directory for E2EE keys and sync state.
-_STORE_DIR = Path.home() / ".hermes" / "matrix" / "store"
+# Uses get_hermes_home() so each profile gets its own Matrix store.
+from hermes_constants import get_hermes_dir as _get_hermes_dir
+_STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store")

 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5
@@ -161,22 +163,49 @@ class MatrixAdapter(BasePlatformAdapter):
        # Authenticate.
        if self._access_token:
            client.access_token = self._access_token
-            # Resolve user_id if not set.
-            if not self._user_id:
-                resp = await client.whoami()
-                if isinstance(resp, nio.WhoamiResponse):
-                    self._user_id = resp.user_id
-                    client.user_id = resp.user_id
-                    logger.info("Matrix: authenticated as %s", self._user_id)
-                else:
-                    logger.error(
-                        "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER"
+
+            # With access-token auth, always resolve whoami so we validate the
+            # token and learn the device_id. The device_id matters for E2EE:
+            # without it, matrix-nio can send plain messages but may fail to
+            # decrypt inbound encrypted events or encrypt outbound room sends.
+            resp = await client.whoami()
+            if isinstance(resp, nio.WhoamiResponse):
+                resolved_user_id = getattr(resp, "user_id", "") or self._user_id
+                resolved_device_id = getattr(resp, "device_id", "")
+                if resolved_user_id:
+                    self._user_id = resolved_user_id
+
+                # restore_login() is the matrix-nio path that binds the access
+                # token to a specific device and loads the crypto store.
+                if resolved_device_id and hasattr(client, "restore_login"):
+                    client.restore_login(
+                        self._user_id or resolved_user_id,
+                        resolved_device_id,
+                        self._access_token,
                    )
-                    await client.close()
-                    return False
+                else:
+                    if self._user_id:
+                        client.user_id = self._user_id
+                    if resolved_device_id:
+                        client.device_id = resolved_device_id
+                    client.access_token = self._access_token
+                    if self._encryption:
+                        logger.warning(
+                            "Matrix: access-token login did not restore E2EE state; "
+                            "encrypted rooms may fail until a device_id is available"
+                        )
+
+                logger.info(
+                    "Matrix: using access token for %s%s",
+                    self._user_id or "(unknown user)",
+                    f" (device {resolved_device_id})" if resolved_device_id else "",
+                )
            else:
-                client.user_id = self._user_id
-                logger.info("Matrix: using access token for %s", self._user_id)
+                logger.error(
+                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER"
+                )
+                await client.close()
+                return False
        elif self._password and self._user_id:
            resp = await client.login(
                self._password,
@@ -194,13 +223,18 @@ class MatrixAdapter(BasePlatformAdapter):
            return False

        # If E2EE is enabled, load the crypto store.
-        if self._encryption and hasattr(client, "olm"):
+        if self._encryption and getattr(client, "olm", None):
            try:
                if client.should_upload_keys:
                    await client.keys_upload()
                logger.info("Matrix: E2EE crypto initialized")
            except Exception as exc:
                logger.warning("Matrix: crypto init issue: %s", exc)
+        elif self._encryption:
+            logger.warning(
+                "Matrix: E2EE requested but crypto store is not loaded; "
+                "encrypted rooms may fail"
+            )

        # Register event callbacks.
        client.add_event_callback(self._on_room_message, nio.RoomMessageText)
@@ -230,6 +264,7 @@ class MatrixAdapter(BasePlatformAdapter):
            )
            # Build DM room cache from m.direct account data.
            await self._refresh_dm_cache()
+            await self._run_e2ee_maintenance()
        else:
            logger.warning("Matrix: initial sync returned %s", type(resp).__name__)

@@ -301,13 +336,48 @@ class MatrixAdapter(BasePlatformAdapter):
                    relates_to["m.in_reply_to"] = {"event_id": reply_to}
                msg_content["m.relates_to"] = relates_to

-            resp = await self._client.room_send(
-                chat_id,
-                "m.room.message",
-                msg_content,
-            )
+            async def _room_send_once(*, ignore_unverified_devices: bool = False):
+                return await asyncio.wait_for(
+                    self._client.room_send(
+                        chat_id,
+                        "m.room.message",
+                        msg_content,
+                        ignore_unverified_devices=ignore_unverified_devices,
+                    ),
+                    timeout=45,
+                )
+
+            try:
+                resp = await _room_send_once(ignore_unverified_devices=False)
+            except Exception as exc:
+                retryable = isinstance(exc, asyncio.TimeoutError)
+                olm_unverified = getattr(nio, "OlmUnverifiedDeviceError", None)
+                send_retry = getattr(nio, "SendRetryError", None)
+                if isinstance(olm_unverified, type) and isinstance(exc, olm_unverified):
+                    retryable = True
+                if isinstance(send_retry, type) and isinstance(exc, send_retry):
+                    retryable = True
+
+                if not retryable:
+                    logger.error("Matrix: failed to send to %s: %s", chat_id, exc)
+                    return SendResult(success=False, error=str(exc))
+
+                logger.warning(
+                    "Matrix: initial encrypted send to %s failed (%s); "
+                    "retrying after E2EE maintenance with ignored unverified devices",
+                    chat_id,
+                    exc,
+                )
+                await self._run_e2ee_maintenance()
+                try:
+                    resp = await _room_send_once(ignore_unverified_devices=True)
+                except Exception as retry_exc:
+                    logger.error("Matrix: failed to send to %s after retry: %s", chat_id, retry_exc)
+                    return SendResult(success=False, error=str(retry_exc))
+
            if isinstance(resp, nio.RoomSendResponse):
                last_event_id = resp.event_id
+                logger.info("Matrix: sent event %s to %s", last_event_id, chat_id)
            else:
                err = getattr(resp, "message", str(resp))
                logger.error("Matrix: failed to send to %s: %s", chat_id, err)
@@ -565,6 +635,9 @@ class MatrixAdapter(BasePlatformAdapter):
                        getattr(resp, "message", resp),
                    )
                    await asyncio.sleep(5)
+                    continue
+
+                await self._run_e2ee_maintenance()
            except asyncio.CancelledError:
                return
            except Exception as exc:
@@ -573,6 +646,38 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.warning("Matrix: sync error: %s — retrying in 5s", exc)
                await asyncio.sleep(5)

+    async def _run_e2ee_maintenance(self) -> None:
+        """Run matrix-nio E2EE housekeeping between syncs.
+
+        Hermes uses a custom sync loop instead of matrix-nio's sync_forever(),
+        so we need to explicitly drive the key management work that sync_forever()
+        normally handles for encrypted rooms.
+        """
+        client = self._client
+        if not client or not self._encryption or not getattr(client, "olm", None):
+            return
+
+        tasks = [asyncio.create_task(client.send_to_device_messages())]
+
+        if client.should_upload_keys:
+            tasks.append(asyncio.create_task(client.keys_upload()))
+
+        if client.should_query_keys:
+            tasks.append(asyncio.create_task(client.keys_query()))
+
+        if client.should_claim_keys:
+            users = client.get_users_for_key_claiming()
+            if users:
+                tasks.append(asyncio.create_task(client.keys_claim(users)))
+
+        for task in asyncio.as_completed(tasks):
+            try:
+                await task
+            except asyncio.CancelledError:
+                raise
+            except Exception as exc:
+                logger.warning("Matrix: E2EE maintenance task failed: %s", exc)
+
    # ------------------------------------------------------------------
    # Event callbacks
    # ------------------------------------------------------------------
@@ -345,7 +345,8 @@ class TelegramAdapter(BasePlatformAdapter):
    def _persist_dm_topic_thread_id(self, chat_id: int, topic_name: str, thread_id: int) -> None:
        """Save a newly created thread_id back into config.yaml so it persists across restarts."""
        try:
-            config_path = _Path.home() / ".hermes" / "config.yaml"
+            from hermes_constants import get_hermes_home
+            config_path = get_hermes_home() / "config.yaml"
            if not config_path.exists():
                logger.warning("[%s] Config file not found at %s, cannot persist thread_id", self.name, config_path)
                return
@@ -1757,7 +1758,8 @@ class TelegramAdapter(BasePlatformAdapter):
        recognized without a gateway restart.
        """
        try:
-            config_path = _Path.home() / ".hermes" / "config.yaml"
+            from hermes_constants import get_hermes_home
+            config_path = get_hermes_home() / "config.yaml"
            if not config_path.exists():
                return

@@ -12,6 +12,7 @@ from __future__ import annotations
 import asyncio
 import ipaddress
 import logging
+import os
 import socket
 from typing import Iterable, Optional

@@ -43,6 +44,14 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


+def _resolve_proxy_url() -> str | None:
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"):
+        value = (os.environ.get(key) or "").strip()
+        if value:
+            return value
+    return None
+
+
 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
    """Retry Telegram Bot API requests via fallback IPs while preserving TLS/SNI.

@@ -54,6 +63,9 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
+        proxy_url = _resolve_proxy_url()
+        if proxy_url and "proxy" not in transport_kwargs:
+            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
        self._fallbacks = {
            ip: httpx.AsyncHTTPTransport(**transport_kwargs) for ip in self._fallback_ips
@@ -27,6 +27,7 @@ import hashlib
 import hmac
 import json
 import logging
+import os
 import re
 import subprocess
 import time
@@ -53,6 +54,7 @@ logger = logging.getLogger(__name__)
 DEFAULT_HOST = "0.0.0.0"
 DEFAULT_PORT = 8644
 _INSECURE_NO_AUTH = "INSECURE_NO_AUTH"
+_DYNAMIC_ROUTES_FILENAME = "webhook_subscriptions.json"


 def check_webhook_requirements() -> bool:
@@ -68,7 +70,10 @@ class WebhookAdapter(BasePlatformAdapter):
        self._host: str = config.extra.get("host", DEFAULT_HOST)
        self._port: int = int(config.extra.get("port", DEFAULT_PORT))
        self._global_secret: str = config.extra.get("secret", "")
-        self._routes: Dict[str, dict] = config.extra.get("routes", {})
+        self._static_routes: Dict[str, dict] = config.extra.get("routes", {})
+        self._dynamic_routes: Dict[str, dict] = {}
+        self._dynamic_routes_mtime: float = 0.0
+        self._routes: Dict[str, dict] = dict(self._static_routes)
        self._runner = None

        # Delivery info keyed by session chat_id — consumed by send()
@@ -96,6 +101,9 @@ class WebhookAdapter(BasePlatformAdapter):
    # ------------------------------------------------------------------

    async def connect(self) -> bool:
+        # Load agent-created subscriptions before validating
+        self._reload_dynamic_routes()
+
        # Validate routes at startup — secret is required per route
        for name, route in self._routes.items():
            secret = route.get("secret", self._global_secret)
@@ -182,8 +190,46 @@ class WebhookAdapter(BasePlatformAdapter):
        """GET /health — simple health check."""
        return web.json_response({"status": "ok", "platform": "webhook"})

+    def _reload_dynamic_routes(self) -> None:
+        """Reload agent-created subscriptions from disk if the file changed."""
+        from pathlib import Path as _Path
+        hermes_home = _Path(
+            os.getenv("HERMES_HOME", str(_Path.home() / ".hermes"))
+        ).expanduser()
+        subs_path = hermes_home / _DYNAMIC_ROUTES_FILENAME
+        if not subs_path.exists():
+            if self._dynamic_routes:
+                self._dynamic_routes = {}
+                self._routes = dict(self._static_routes)
+                logger.debug("[webhook] Dynamic subscriptions file removed, cleared dynamic routes")
+            return
+        try:
+            mtime = subs_path.stat().st_mtime
+            if mtime <= self._dynamic_routes_mtime:
+                return  # No change
+            data = json.loads(subs_path.read_text(encoding="utf-8"))
+            if not isinstance(data, dict):
+                return
+            # Merge: static routes take precedence over dynamic ones
+            self._dynamic_routes = {
+                k: v for k, v in data.items()
+                if k not in self._static_routes
+            }
+            self._routes = {**self._dynamic_routes, **self._static_routes}
+            self._dynamic_routes_mtime = mtime
+            logger.info(
+                "[webhook] Reloaded %d dynamic route(s): %s",
+                len(self._dynamic_routes),
+                ", ".join(self._dynamic_routes.keys()) or "(none)",
+            )
+        except Exception as e:
+            logger.warning("[webhook] Failed to reload dynamic routes: %s", e)
+
    async def _handle_webhook(self, request: "web.Request") -> "web.Response":
        """POST /webhooks/{route_name} — receive and process a webhook event."""
+        # Hot-reload dynamic subscriptions on each request (mtime-gated, cheap)
+        self._reload_dynamic_routes()
+
        route_name = request.match_info.get("route_name", "")
        route_config = self._routes.get(route_name)

@@ -26,6 +26,7 @@ from pathlib import Path
 from typing import Dict, Optional, Any

 from hermes_cli.config import get_hermes_home
+from hermes_constants import get_hermes_dir

 logger = logging.getLogger(__name__)

@@ -134,7 +135,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
        )
        self._session_path: Path = Path(config.extra.get(
            "session_path",
-            get_hermes_home() / "whatsapp" / "session"
+            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
        self._message_queue: asyncio.Queue = asyncio.Queue()
@@ -526,6 +527,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
        image_path: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a local image file natively via bridge."""
        return await self._send_media_to_bridge(chat_id, image_path, "image", caption)
@@ -536,6 +538,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
        video_path: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a video natively via bridge — plays inline in WhatsApp."""
        return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
@@ -547,6 +550,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a document/file as a downloadable attachment via bridge."""
        return await self._send_media_to_bridge(
@@ -288,7 +288,7 @@ def _resolve_gateway_model(config: dict | None = None) -> str:
    if isinstance(model_cfg, str):
        model = model_cfg
    elif isinstance(model_cfg, dict):
-        model = model_cfg.get("default", model)
+        model = model_cfg.get("default") or model_cfg.get("model") or model
    return model


@@ -432,7 +432,7 @@ class GatewayRunner:
            from honcho_integration.session import HonchoSessionManager

            hcfg = HonchoClientConfig.from_global_config()
-            if not hcfg.enabled or not hcfg.api_key:
+            if not hcfg.enabled or not (hcfg.api_key or hcfg.base_url):
                return None, hcfg

            client = get_honcho_client(hcfg)
@@ -745,10 +745,22 @@ class GatewayRunner:
                logger.error("No connected messaging platforms remain. Shutting down gateway cleanly.")
            await self.stop()
        elif not self.adapters and self._failed_platforms:
-            logger.warning(
-                "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
-                len(self._failed_platforms),
-            )
+            # All platforms are down and queued for background reconnection.
+            # If the error is retryable, exit with failure so systemd Restart=on-failure
+            # can restart the process. Otherwise stay alive and keep retrying in background.
+            if adapter.fatal_error_retryable:
+                self._exit_reason = adapter.fatal_error_message or "All messaging platforms failed with retryable errors"
+                self._exit_with_failure = True
+                logger.error(
+                    "All messaging platforms failed with retryable errors. "
+                    "Shutting down gateway for service restart (systemd will retry)."
+                )
+                await self.stop()
+            else:
+                logger.warning(
+                    "No connected messaging platforms remain, but %d platform(s) queued for reconnection",
+                    len(self._failed_platforms),
+                )

    def _request_clean_exit(self, reason: str) -> None:
        self._exit_cleanly = True
@@ -2081,7 +2093,7 @@ class GatewayRunner:
                    if isinstance(_model_cfg, str):
                        _hyg_model = _model_cfg
                    elif isinstance(_model_cfg, dict):
-                        _hyg_model = _model_cfg.get("default", _hyg_model)
+                        _hyg_model = _model_cfg.get("default") or _model_cfg.get("model") or _hyg_model
                        # Read explicit context_length override from model config
                        # (same as run_agent.py lines 995-1005)
                        _raw_ctx = _model_cfg.get("context_length")
@@ -2204,6 +2216,15 @@ class GatewayRunner:
                                    ),
                                )

+                                # _compress_context ends the old session and creates
+                                # a new session_id.  Write compressed messages into
+                                # the NEW session so the old transcript stays intact
+                                # and searchable via session_search.
+                                _hyg_new_sid = _hyg_agent.session_id
+                                if _hyg_new_sid != session_entry.session_id:
+                                    session_entry.session_id = _hyg_new_sid
+                                    self.session_store._save()
+
                                self.session_store.rewrite_transcript(
                                    session_entry.session_id, _compressed
                                )
@@ -3998,13 +4019,22 @@ class GatewayRunner:
            loop = asyncio.get_event_loop()
            compressed, _ = await loop.run_in_executor(
                None,
-                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens),
+                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens)
            )

-            self.session_store.rewrite_transcript(session_entry.session_id, compressed)
+            # _compress_context already calls end_session() on the old session
+            # (preserving its full transcript in SQLite) and creates a new
+            # session_id for the continuation.  Write the compressed messages
+            # into the NEW session so the original history stays searchable.
+            new_session_id = tmp_agent.session_id
+            if new_session_id != session_entry.session_id:
+                session_entry.session_id = new_session_id
+                self.session_store._save()
+
+            self.session_store.rewrite_transcript(new_session_id, compressed)
            # Reset stored token count — transcript changed, old value is stale
            self.session_store.update_session(
-                session_entry.session_key, last_prompt_tokens=0,
+                session_entry.session_key, last_prompt_tokens=0
            )
            new_count = len(compressed)
            new_tokens = estimate_messages_tokens_rough(compressed)
@@ -4160,7 +4190,7 @@ class GatewayRunner:
            ]
            ctx = agent.context_compressor
            if ctx.last_prompt_tokens:
-                pct = ctx.last_prompt_tokens / ctx.context_length * 100 if ctx.context_length else 0
+                pct = min(100, ctx.last_prompt_tokens / ctx.context_length * 100) if ctx.context_length else 0
                lines.append(f"Context: {ctx.last_prompt_tokens:,} / {ctx.context_length:,} ({pct:.0f}%)")
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")
@@ -4974,12 +5004,17 @@ class GatewayRunner:
            progress_queue.put(msg)
        
        # Background task to send progress messages
-        # Accumulates tool lines into a single message that gets edited
-        # For DM top-level Slack messages, source.thread_id is None but the
-        # final reply will be threaded under the original message via reply_to.
-        # Use event_message_id as fallback so progress messages land in the
-        # same thread as the final response instead of going to the DM root.
-        _progress_thread_id = source.thread_id or event_message_id
+        # Accumulates tool lines into a single message that gets edited.
+        #
+        # Threading metadata is platform-specific:
+        # - Slack DM threading needs event_message_id fallback (reply thread)
+        # - Telegram uses message_thread_id only for forum topics; passing a
+        #   normal DM/group message id as thread_id causes send failures
+        # - Other platforms should use explicit source.thread_id only
+        if source.platform == Platform.SLACK:
+            _progress_thread_id = source.thread_id or event_message_id
+        else:
+            _progress_thread_id = source.thread_id
        _progress_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None

        async def send_progress_messages():
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.4.0"
-__release_date__ = "2026.3.23"
+__version__ = "0.5.0"
+__release_date__ = "2026.3.28"
@@ -160,7 +160,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="alibaba",
        name="Alibaba Cloud (DashScope)",
        auth_type="api_key",
-        inference_base_url="https://dashscope-intl.aliyuncs.com/apps/anthropic",
+        inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1",
        api_key_env_vars=("DASHSCOPE_API_KEY",),
        base_url_env_var="DASHSCOPE_BASE_URL",
    ),
@@ -138,6 +138,12 @@ DEFAULT_CONFIG = {
    "toolsets": ["hermes-cli"],
    "agent": {
        "max_turns": 90,
+        # Tool-use enforcement: injects system prompt guidance that tells the
+        # model to actually call tools instead of describing intended actions.
+        # Values: "auto" (default — applies to gpt/codex models), true/false
+        # (force on/off for all models), or a list of model-name substrings
+        # to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
+        "tool_use_enforcement": "auto",
    },
    
    "terminal": {
@@ -221,42 +227,49 @@ DEFAULT_CONFIG = {
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 30,         # seconds — increase for slow local models
        },
        "compression": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 120,        # seconds — compression summarises large contexts; increase for local models
        },
        "session_search": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 30,
        },
        "skills_hub": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 30,
        },
        "approval": {
            "provider": "auto",
            "model": "",           # fast/cheap model recommended (e.g. gemini-flash, haiku)
            "base_url": "",
            "api_key": "",
+            "timeout": 30,
        },
        "mcp": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 30,
        },
        "flush_memories": {
            "provider": "auto",
            "model": "",
            "base_url": "",
            "api_key": "",
+            "timeout": 30,
        },
    },
    
@@ -547,14 +560,14 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
    },
    "DASHSCOPE_API_KEY": {
-        "description": "Alibaba Cloud DashScope API key for Qwen models",
+        "description": "Alibaba Cloud DashScope API key (Qwen + multi-provider models)",
        "prompt": "DashScope API Key",
        "url": "https://modelstudio.console.alibabacloud.com/",
        "password": True,
        "category": "provider",
    },
    "DASHSCOPE_BASE_URL": {
-        "description": "Custom DashScope base URL (default: international endpoint)",
+        "description": "Custom DashScope base URL (default: coding-intl OpenAI-compat endpoint)",
        "prompt": "DashScope Base URL",
        "url": "",
        "password": False,
@@ -610,6 +623,14 @@ OPTIONAL_ENV_VARS = {
    },

    # ── Tool API keys ──
+    "EXA_API_KEY": {
+        "description": "Exa API key for AI-native web search and contents",
+        "prompt": "Exa API key",
+        "url": "https://exa.ai/",
+        "tools": ["web_search", "web_extract"],
+        "password": True,
+        "category": "tool",
+    },
    "PARALLEL_API_KEY": {
        "description": "Parallel API key for AI-native web search and extract",
        "prompt": "Parallel API key",
@@ -1666,6 +1687,7 @@ def show_config():
    keys = [
        ("OPENROUTER_API_KEY", "OpenRouter"),
        ("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
+        ("EXA_API_KEY", "Exa"),
        ("PARALLEL_API_KEY", "Parallel"),
        ("FIRECRAWL_API_KEY", "Firecrawl"),
        ("TAVILY_API_KEY", "Tavily"),
@@ -1825,7 +1847,7 @@ def set_config_value(key: str, value: str):
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
-        'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY',
+        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY',
        'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
@@ -56,7 +56,7 @@ def _honcho_is_configured_for_doctor() -> bool:
        from honcho_integration.client import HonchoClientConfig

        cfg = HonchoClientConfig.from_global_config()
-        return bool(cfg.enabled and cfg.api_key)
+        return bool(cfg.enabled and (cfg.api_key or cfg.base_url))
    except Exception:
        return False

@@ -708,8 +708,8 @@ def run_doctor(args):
            check_warn("Honcho config not found", "run: hermes honcho setup")
        elif not hcfg.enabled:
            check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
-        elif not hcfg.api_key:
-            check_fail("Honcho API key not set", "run: hermes honcho setup")
+        elif not (hcfg.api_key or hcfg.base_url):
+            check_fail("Honcho API key or base URL not set", "run: hermes honcho setup")
            issues.append("No Honcho API key — run 'hermes honcho setup'")
        else:
            from honcho_integration.client import get_honcho_client, reset_honcho_client
@@ -125,20 +125,43 @@ _SERVICE_BASE = "hermes-gateway"
 SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"


+def _profile_suffix() -> str:
+    """Derive a service-name suffix from the current HERMES_HOME.
+
+    Returns ``""`` for the default ``~/.hermes``, the profile name for
+    ``~/.hermes/profiles/<name>``, or a short hash for any other custom
+    HERMES_HOME path.
+    """
+    import hashlib
+    import re
+    from pathlib import Path as _Path
+    home = get_hermes_home().resolve()
+    default = (_Path.home() / ".hermes").resolve()
+    if home == default:
+        return ""
+    # Detect ~/.hermes/profiles/<name> pattern → use the profile name
+    profiles_root = (default / "profiles").resolve()
+    try:
+        rel = home.relative_to(profiles_root)
+        parts = rel.parts
+        if len(parts) == 1 and re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", parts[0]):
+            return parts[0]
+    except ValueError:
+        pass
+    # Fallback: short hash for arbitrary HERMES_HOME paths
+    return hashlib.sha256(str(home).encode()).hexdigest()[:8]
+
+
 def get_service_name() -> str:
    """Derive a systemd service name scoped to this HERMES_HOME.

    Default ``~/.hermes`` returns ``hermes-gateway`` (backward compatible).
-    Any other HERMES_HOME appends a short hash so multiple installations
-    can each have their own systemd service without conflicting.
+    Profile ``~/.hermes/profiles/coder`` returns ``hermes-gateway-coder``.
+    Any other HERMES_HOME appends a short hash for uniqueness.
    """
-    import hashlib
-    from pathlib import Path as _Path  # local import to avoid monkeypatch interference
-    home = get_hermes_home().resolve()
-    default = (_Path.home() / ".hermes").resolve()
-    if home == default:
+    suffix = _profile_suffix()
+    if not suffix:
        return _SERVICE_BASE
-    suffix = hashlib.sha256(str(home).encode()).hexdigest()[:8]
    return f"{_SERVICE_BASE}-{suffix}"


@@ -369,7 +392,14 @@ def print_systemd_linger_guidance() -> None:
        print("  sudo loginctl enable-linger $USER")

 def get_launchd_plist_path() -> Path:
-    return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"
+    """Return the launchd plist path, scoped per profile.
+
+    Default ``~/.hermes`` → ``ai.hermes.gateway.plist`` (backward compatible).
+    Profile ``~/.hermes/profiles/coder`` → ``ai.hermes.gateway-coder.plist``.
+    """
+    suffix = _profile_suffix()
+    name = f"ai.hermes.gateway-{suffix}" if suffix else "ai.hermes.gateway"
+    return Path.home() / "Library" / "LaunchAgents" / f"{name}.plist"

 def _detect_venv_dir() -> Path | None:
    """Detect the active virtualenv directory.
@@ -420,6 +450,17 @@ def get_hermes_cli_path() -> str:
 # Systemd (Linux)
 # =============================================================================

+def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
+    """Return user-local bin dirs that exist and aren't already in *path_entries*."""
+    candidates = [
+        str(home / ".local" / "bin"),       # uv, uvx, pip-installed CLIs
+        str(home / ".cargo" / "bin"),        # Rust/cargo tools
+        str(home / "go" / "bin"),            # Go tools
+        str(home / ".npm-global" / "bin"),   # npm global packages
+    ]
+    return [p for p in candidates if p not in path_entries and Path(p).exists()]
+
+
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
@@ -434,13 +475,16 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
        resolved_node_dir = str(Path(resolved_node).resolve().parent)
        if resolved_node_dir not in path_entries:
            path_entries.append(resolved_node_dir)
-    path_entries.extend(["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"])
-    sane_path = ":".join(path_entries)

    hermes_home = str(get_hermes_home().resolve())

+    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
+
    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
+        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
+        path_entries.extend(common_bin_paths)
+        sane_path = ":".join(path_entries)
        return f"""[Unit]
 Description={SERVICE_DESCRIPTION}
 After=network-online.target
@@ -472,6 +516,9 @@ StandardError=journal
 WantedBy=multi-user.target
 """

+    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
+    path_entries.extend(common_bin_paths)
+    sane_path = ":".join(path_entries)
    return f"""[Unit]
 Description={SERVICE_DESCRIPTION}
 After=network.target
@@ -752,18 +799,46 @@ def systemd_status(deep: bool = False, system: bool = False):
 # Launchd (macOS)
 # =============================================================================

+def get_launchd_label() -> str:
+    """Return the launchd service label, scoped per profile."""
+    suffix = _profile_suffix()
+    return f"ai.hermes.gateway-{suffix}" if suffix else "ai.hermes.gateway"
+
+
 def generate_launchd_plist() -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
+    hermes_home = str(get_hermes_home().resolve())
    log_dir = get_hermes_home() / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
-    
+    label = get_launchd_label()
+    # Build a sane PATH for the launchd plist.  launchd provides only a
+    # minimal default (/usr/bin:/bin:/usr/sbin:/sbin) which misses Homebrew,
+    # nvm, cargo, etc.  We prepend venv/bin and node_modules/.bin (matching
+    # the systemd unit), then capture the user's full shell PATH so every
+    # user-installed tool (node, ffmpeg, …) is reachable.
+    detected_venv = _detect_venv_dir()
+    venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
+    venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
+    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")
+    # Resolve the directory containing the node binary (e.g. Homebrew, nvm)
+    # so it's explicitly in PATH even if the user's shell PATH changes later.
+    priority_dirs = [venv_bin, node_bin]
+    resolved_node = shutil.which("node")
+    if resolved_node:
+        resolved_node_dir = str(Path(resolved_node).resolve().parent)
+        if resolved_node_dir not in priority_dirs:
+            priority_dirs.append(resolved_node_dir)
+    sane_path = ":".join(
+        dict.fromkeys(priority_dirs + [p for p in os.environ.get("PATH", "").split(":") if p])
+    )
+
    return f"""<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
    <key>Label</key>
-    <string>ai.hermes.gateway</string>
+    <string>{label}</string>
    
    <key>ProgramArguments</key>
    <array>
@@ -778,6 +853,16 @@ def generate_launchd_plist() -> str:
    <key>WorkingDirectory</key>
    <string>{working_dir}</string>
    
+    <key>EnvironmentVariables</key>
+    <dict>
+        <key>PATH</key>
+        <string>{sane_path}</string>
+        <key>VIRTUAL_ENV</key>
+        <string>{venv_dir}</string>
+        <key>HERMES_HOME</key>
+        <string>{hermes_home}</string>
+    </dict>
+    
    <key>RunAtLoad</key>
    <true/>
    
@@ -863,20 +948,33 @@ def launchd_uninstall():
    print("✓ Service uninstalled")

 def launchd_start():
-    refresh_launchd_plist_if_needed()
    plist_path = get_launchd_plist_path()
+    label = get_launchd_label()
+
+    # Self-heal if the plist is missing entirely (e.g., manual cleanup, failed upgrade)
+    if not plist_path.exists():
+        print("↻ launchd plist missing; regenerating service definition")
+        plist_path.parent.mkdir(parents=True, exist_ok=True)
+        plist_path.write_text(generate_launchd_plist(), encoding="utf-8")
+        subprocess.run(["launchctl", "load", str(plist_path)], check=True)
+        subprocess.run(["launchctl", "start", label], check=True)
+        print("✓ Service started")
+        return
+
+    refresh_launchd_plist_if_needed()
    try:
-        subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+        subprocess.run(["launchctl", "start", label], check=True)
    except subprocess.CalledProcessError as e:
-        if e.returncode != 3 or not plist_path.exists():
+        if e.returncode != 3:
            raise
        print("↻ launchd job was unloaded; reloading service definition")
        subprocess.run(["launchctl", "load", str(plist_path)], check=True)
-        subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+        subprocess.run(["launchctl", "start", label], check=True)
    print("✓ Service started")

 def launchd_stop():
-    subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
+    label = get_launchd_label()
+    subprocess.run(["launchctl", "stop", label], check=True)
    print("✓ Service stopped")

 def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
@@ -931,8 +1029,9 @@ def launchd_restart():

 def launchd_status(deep: bool = False):
    plist_path = get_launchd_plist_path()
+    label = get_launchd_label()
    result = subprocess.run(
-        ["launchctl", "list", "ai.hermes.gateway"],
+        ["launchctl", "list", label],
        capture_output=True,
        text=True
    )
@@ -1437,7 +1536,7 @@ def _is_service_running() -> bool:
        return False
    elif is_macos() and get_launchd_plist_path().exists():
        result = subprocess.run(
-            ["launchctl", "list", "ai.hermes.gateway"],
+            ["launchctl", "list", get_launchd_label()],
            capture_output=True, text=True
        )
        return result.returncode == 0
@@ -821,7 +821,7 @@ def cmd_model(args):
        ("opencode-zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
        ("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
        ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
-        ("alibaba", "Alibaba Cloud / DashScope (Qwen models, Anthropic-compatible)"),
+        ("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
        ("huggingface", "Hugging Face Inference Providers (20+ open models)"),
    ]

@@ -832,8 +832,8 @@ def cmd_model(args):
        for entry in custom_providers_cfg:
            if not isinstance(entry, dict):
                continue
-            name = entry.get("name", "").strip()
-            base_url = entry.get("base_url", "").strip()
+            name = (entry.get("name") or "").strip()
+            base_url = (entry.get("base_url") or "").strip()
            if not name or not base_url:
                continue
            # Generate a stable key from the name
@@ -2339,6 +2339,12 @@ def cmd_cron(args):
    cron_command(args)


+def cmd_webhook(args):
+    """Webhook subscription management."""
+    from hermes_cli.webhook import webhook_command
+    webhook_command(args)
+
+
 def cmd_doctor(args):
    """Check configuration and dependencies."""
    from hermes_cli.doctor import run_doctor
@@ -2470,8 +2476,18 @@ def _update_via_zip(args):
            )
    else:
        # Use sys.executable to explicitly call the venv's pip module,
-        # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu
+        # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu.
+        # Some environments lose pip inside the venv; bootstrap it back with
+        # ensurepip before trying the editable install.
        pip_cmd = [sys.executable, "-m", "pip"]
+        try:
+            subprocess.run(pip_cmd + ["--version"], cwd=PROJECT_ROOT, check=True, capture_output=True)
+        except subprocess.CalledProcessError:
+            subprocess.run(
+                [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"],
+                cwd=PROJECT_ROOT,
+                check=True,
+            )
        try:
            subprocess.run(pip_cmd + ["install", "-e", ".[all]", "--quiet"], cwd=PROJECT_ROOT, check=True)
        except subprocess.CalledProcessError:
@@ -2632,7 +2648,12 @@ def _restore_stashed_changes(
            print("Resolve conflicts manually, then run: git stash drop")

        print(f"Restore your changes with: git stash apply {stash_ref}")
-        sys.exit(1)
+        # In non-interactive mode (gateway /update), don't abort — the code
+        # update itself succeeded, only the stash restore had conflicts.
+        # Aborting would report the entire update as failed.
+        if prompt_user:
+            sys.exit(1)
+        return False

    stash_selector = _resolve_stash_selector(git_cmd, cwd, stash_ref)
    if stash_selector is None:
@@ -2706,30 +2727,60 @@ def cmd_update(args):

    # Fetch and pull
    try:
-        print("→ Fetching updates...")
        git_cmd = ["git"]
        if sys.platform == "win32":
            git_cmd = ["git", "-c", "windows.appendAtomically=false"]
-        
-        subprocess.run(git_cmd + ["fetch", "origin"], cwd=PROJECT_ROOT, check=True)
-        
-        # Get current branch
+
+        print("→ Fetching updates...")
+        fetch_result = subprocess.run(
+            git_cmd + ["fetch", "origin"],
+            cwd=PROJECT_ROOT,
+            capture_output=True,
+            text=True,
+        )
+        if fetch_result.returncode != 0:
+            stderr = fetch_result.stderr.strip()
+            if "Could not resolve host" in stderr or "unable to access" in stderr:
+                print("✗ Network error — cannot reach the remote repository.")
+                print(f"  {stderr.splitlines()[0]}" if stderr else "")
+            elif "Authentication failed" in stderr or "could not read Username" in stderr:
+                print("✗ Authentication failed — check your git credentials or SSH key.")
+            else:
+                print(f"✗ Failed to fetch updates from origin.")
+                if stderr:
+                    print(f"  {stderr.splitlines()[0]}")
+            sys.exit(1)
+
+        # Get current branch (returns literal "HEAD" when detached)
        result = subprocess.run(
            git_cmd + ["rev-parse", "--abbrev-ref", "HEAD"],
            cwd=PROJECT_ROOT,
            capture_output=True,
            text=True,
-            check=True
+            check=True,
        )
-        branch = result.stdout.strip()
+        current_branch = result.stdout.strip()

-        # Fall back to main if the current branch doesn't exist on the remote
-        verify = subprocess.run(
-            git_cmd + ["rev-parse", "--verify", f"origin/{branch}"],
-            cwd=PROJECT_ROOT, capture_output=True, text=True,
-        )
-        if verify.returncode != 0:
-            branch = "main"
+        # Always update against main
+        branch = "main"
+
+        # If user is on a non-main branch or detached HEAD, switch to main
+        if current_branch != "main":
+            label = "detached HEAD" if current_branch == "HEAD" else f"branch '{current_branch}'"
+            print(f"  ⚠ Currently on {label} — switching to main for update...")
+            # Stash before checkout so uncommitted work isn't lost
+            auto_stash_ref = _stash_local_changes_if_needed(git_cmd, PROJECT_ROOT)
+            subprocess.run(
+                git_cmd + ["checkout", "main"],
+                cwd=PROJECT_ROOT,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+        else:
+            auto_stash_ref = _stash_local_changes_if_needed(git_cmd, PROJECT_ROOT)
+
+        prompt_for_restore = auto_stash_ref is not None and sys.stdin.isatty() and sys.stdout.isatty()

        # Check if there are updates
        result = subprocess.run(
@@ -2737,31 +2788,69 @@ def cmd_update(args):
            cwd=PROJECT_ROOT,
            capture_output=True,
            text=True,
-            check=True
+            check=True,
        )
        commit_count = int(result.stdout.strip())
-        
+
        if commit_count == 0:
            _invalidate_update_cache()
-            print("✓ Already up to date!")
-            return
-        
-        print(f"→ Found {commit_count} new commit(s)")
-
-        auto_stash_ref = _stash_local_changes_if_needed(git_cmd, PROJECT_ROOT)
-        prompt_for_restore = auto_stash_ref is not None and sys.stdin.isatty() and sys.stdout.isatty()
-
-        print("→ Pulling updates...")
-        try:
-            subprocess.run(git_cmd + ["pull", "--ff-only", "origin", branch], cwd=PROJECT_ROOT, check=True)
-        finally:
+            # Restore stash and switch back to original branch if we moved
            if auto_stash_ref is not None:
                _restore_stashed_changes(
-                    git_cmd,
-                    PROJECT_ROOT,
-                    auto_stash_ref,
+                    git_cmd, PROJECT_ROOT, auto_stash_ref,
                    prompt_user=prompt_for_restore,
                )
+            if current_branch not in ("main", "HEAD"):
+                subprocess.run(
+                    git_cmd + ["checkout", current_branch],
+                    cwd=PROJECT_ROOT, capture_output=True, text=True, check=False,
+                )
+            print("✓ Already up to date!")
+            return
+
+        print(f"→ Found {commit_count} new commit(s)")
+
+        print("→ Pulling updates...")
+        update_succeeded = False
+        try:
+            pull_result = subprocess.run(
+                git_cmd + ["pull", "--ff-only", "origin", branch],
+                cwd=PROJECT_ROOT,
+                capture_output=True,
+                text=True,
+            )
+            if pull_result.returncode != 0:
+                # ff-only failed — local and remote have diverged (e.g. upstream
+                # force-pushed or rebase).  Since local changes are already
+                # stashed, reset to match the remote exactly.
+                print("  ⚠ Fast-forward not possible (history diverged), resetting to match remote...")
+                reset_result = subprocess.run(
+                    git_cmd + ["reset", "--hard", f"origin/{branch}"],
+                    cwd=PROJECT_ROOT,
+                    capture_output=True,
+                    text=True,
+                )
+                if reset_result.returncode != 0:
+                    print(f"✗ Failed to reset to origin/{branch}.")
+                    if reset_result.stderr.strip():
+                        print(f"  {reset_result.stderr.strip()}")
+                    print("  Try manually: git fetch origin && git reset --hard origin/main")
+                    sys.exit(1)
+            update_succeeded = True
+        finally:
+            if auto_stash_ref is not None:
+                # Don't attempt stash restore if the code update itself failed —
+                # working tree is in an unknown state.
+                if not update_succeeded:
+                    print(f"  ℹ️  Local changes preserved in stash (ref: {auto_stash_ref})")
+                    print(f"  Restore manually with: git stash apply")
+                else:
+                    _restore_stashed_changes(
+                        git_cmd,
+                        PROJECT_ROOT,
+                        auto_stash_ref,
+                        prompt_user=prompt_for_restore,
+                    )
        
        _invalidate_update_cache()
        
@@ -2784,8 +2873,18 @@ def cmd_update(args):
                )
        else:
            # Use sys.executable to explicitly call the venv's pip module,
-            # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu
+            # avoiding PEP 668 'externally-managed-environment' errors on Debian/Ubuntu.
+            # Some environments lose pip inside the venv; bootstrap it back with
+            # ensurepip before trying the editable install.
            pip_cmd = [sys.executable, "-m", "pip"]
+            try:
+                subprocess.run(pip_cmd + ["--version"], cwd=PROJECT_ROOT, check=True, capture_output=True)
+            except subprocess.CalledProcessError:
+                subprocess.run(
+                    [sys.executable, "-m", "ensurepip", "--upgrade", "--default-pip"],
+                    cwd=PROJECT_ROOT,
+                    check=True,
+                )
            try:
                subprocess.run(pip_cmd + ["install", "-e", ".[all]", "--quiet"], cwd=PROJECT_ROOT, check=True)
            except subprocess.CalledProcessError:
@@ -2844,10 +2943,15 @@ def cmd_update(args):
                print(f"  ℹ️  {len(missing_config)} new config option(s) available")
            
            print()
-            if sys.stdin.isatty():
-                response = input("Would you like to configure them now? [Y/n]: ").strip().lower()
-            else:
+            if not (sys.stdin.isatty() and sys.stdout.isatty()):
+                print("  ℹ Non-interactive session — skipping config migration prompt.")
+                print("    Run 'hermes config migrate' later to apply any new config/env options.")
                response = "n"
+            else:
+                try:
+                    response = input("Would you like to configure them now? [Y/n]: ").strip().lower()
+                except EOFError:
+                    response = "n"
            
            if response in ('', 'y', 'yes'):
                print()
@@ -2895,10 +2999,11 @@ def cmd_update(args):
            # Check for macOS launchd service
            if is_macos():
                try:
+                    from hermes_cli.gateway import get_launchd_label
                    plist_path = get_launchd_plist_path()
                    if plist_path.exists():
                        check = subprocess.run(
-                            ["launchctl", "list", "ai.hermes.gateway"],
+                            ["launchctl", "list", get_launchd_label()],
                            capture_output=True, text=True, timeout=5,
                        )
                        has_launchd_service = check.returncode == 0
@@ -2954,12 +3059,13 @@ def cmd_update(args):
                    # after a manual SIGTERM, which would race with the
                    # PID file cleanup.
                    print("→ Restarting gateway service...")
+                    _launchd_label = get_launchd_label()
                    stop = subprocess.run(
-                        ["launchctl", "stop", "ai.hermes.gateway"],
+                        ["launchctl", "stop", _launchd_label],
                        capture_output=True, text=True, timeout=10,
                    )
                    start = subprocess.run(
-                        ["launchctl", "start", "ai.hermes.gateway"],
+                        ["launchctl", "start", _launchd_label],
                        capture_output=True, text=True, timeout=10,
                    )
                    if start.returncode == 0:
@@ -3443,7 +3549,38 @@ For more help on a command:
    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")

    cron_parser.set_defaults(func=cmd_cron)
-    
+
+    # =========================================================================
+    # webhook command
+    # =========================================================================
+    webhook_parser = subparsers.add_parser(
+        "webhook",
+        help="Manage dynamic webhook subscriptions",
+        description="Create, list, and remove webhook subscriptions for event-driven agent activation",
+    )
+    webhook_subparsers = webhook_parser.add_subparsers(dest="webhook_action")
+
+    wh_sub = webhook_subparsers.add_parser("subscribe", aliases=["add"], help="Create a webhook subscription")
+    wh_sub.add_argument("name", help="Route name (used in URL: /webhooks/<name>)")
+    wh_sub.add_argument("--prompt", default="", help="Prompt template with {dot.notation} payload refs")
+    wh_sub.add_argument("--events", default="", help="Comma-separated event types to accept")
+    wh_sub.add_argument("--description", default="", help="What this subscription does")
+    wh_sub.add_argument("--skills", default="", help="Comma-separated skill names to load")
+    wh_sub.add_argument("--deliver", default="log", help="Delivery target: log, telegram, discord, slack, etc.")
+    wh_sub.add_argument("--deliver-chat-id", default="", help="Target chat ID for cross-platform delivery")
+    wh_sub.add_argument("--secret", default="", help="HMAC secret (auto-generated if omitted)")
+
+    webhook_subparsers.add_parser("list", aliases=["ls"], help="List all dynamic subscriptions")
+
+    wh_rm = webhook_subparsers.add_parser("remove", aliases=["rm"], help="Remove a subscription")
+    wh_rm.add_argument("name", help="Subscription name to remove")
+
+    wh_test = webhook_subparsers.add_parser("test", help="Send a test POST to a webhook route")
+    wh_test.add_argument("name", help="Subscription name to test")
+    wh_test.add_argument("--payload", default="", help="JSON payload to send (default: test payload)")
+
+    webhook_parser.set_defaults(func=cmd_webhook)
+
    # =========================================================================
    # doctor command
    # =========================================================================
@@ -3576,7 +3713,7 @@ For more help on a command:
    skills_snapshot = skills_subparsers.add_parser("snapshot", help="Export/import skill configurations")
    snapshot_subparsers = skills_snapshot.add_subparsers(dest="snapshot_action")
    snap_export = snapshot_subparsers.add_parser("export", help="Export installed skills to a file")
-    snap_export.add_argument("output", help="Output JSON file path")
+    snap_export.add_argument("output", help="Output JSON file path (use - for stdout)")
    snap_import = snapshot_subparsers.add_parser("import", help="Import and install skills from a file")
    snap_import.add_argument("input", help="Input JSON file path")
    snap_import.add_argument("--force", action="store_true", help="Force install despite caution verdict")
@@ -3853,7 +3990,7 @@ For more help on a command:
    sessions_list.add_argument("--limit", type=int, default=20, help="Max sessions to show")

    sessions_export = sessions_subparsers.add_parser("export", help="Export sessions to a JSONL file")
-    sessions_export.add_argument("output", help="Output JSONL file path")
+    sessions_export.add_argument("output", help="Output JSONL file path (use - for stdout)")
    sessions_export.add_argument("--source", help="Filter by source")
    sessions_export.add_argument("--session-id", help="Export a specific session")

@@ -3934,15 +4071,25 @@ For more help on a command:
                if not data:
                    print(f"Session '{args.session_id}' not found.")
                    return
-                with open(args.output, "w", encoding="utf-8") as f:
-                    f.write(_json.dumps(data, ensure_ascii=False) + "\n")
-                print(f"Exported 1 session to {args.output}")
+                line = _json.dumps(data, ensure_ascii=False) + "\n"
+                if args.output == "-":
+                    import sys
+                    sys.stdout.write(line)
+                else:
+                    with open(args.output, "w", encoding="utf-8") as f:
+                        f.write(line)
+                    print(f"Exported 1 session to {args.output}")
            else:
                sessions = db.export_all(source=args.source)
-                with open(args.output, "w", encoding="utf-8") as f:
+                if args.output == "-":
+                    import sys
                    for s in sessions:
-                        f.write(_json.dumps(s, ensure_ascii=False) + "\n")
-                print(f"Exported {len(sessions)} sessions to {args.output}")
+                        sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
+                else:
+                    with open(args.output, "w", encoding="utf-8") as f:
+                        for s in sessions:
+                            f.write(_json.dumps(s, ensure_ascii=False) + "\n")
+                    print(f"Exported {len(sessions)} sessions to {args.output}")

        elif action == "delete":
            resolved_session_id = db.resolve_session_id(args.session_id)
@@ -208,14 +208,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
    ],
+    # Alibaba DashScope Coding platform (coding-intl) — default endpoint.
+    # Supports Qwen models + third-party providers (GLM, Kimi, MiniMax).
+    # Users with classic DashScope keys should override DASHSCOPE_BASE_URL
+    # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
+    # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
    "alibaba": [
        "qwen3.5-plus",
-        "qwen3-max",
        "qwen3-coder-plus",
        "qwen3-coder-next",
-        "qwen-plus-latest",
-        "qwen3.5-flash",
-        "qwen-vl-max",
+        # Third-party models available on coding-intl
+        "glm-5",
+        "glm-4.7",
+        "kimi-k2.5",
+        "MiniMax-M2.5",
    ],
    # Curated HF model list — only agentic models that map to OpenRouter defaults.
    "huggingface": [
@@ -385,16 +385,23 @@ class PluginManager:
    # Hook invocation
    # -----------------------------------------------------------------------

-    def invoke_hook(self, hook_name: str, **kwargs: Any) -> None:
+    def invoke_hook(self, hook_name: str, **kwargs: Any) -> List[Any]:
        """Call all registered callbacks for *hook_name*.

        Each callback is wrapped in its own try/except so a misbehaving
        plugin cannot break the core agent loop.
+
+        Returns a list of non-``None`` return values from callbacks.
+        This allows hooks like ``pre_llm_call`` to contribute context
+        that the agent core can collect and inject.
        """
        callbacks = self._hooks.get(hook_name, [])
+        results: List[Any] = []
        for cb in callbacks:
            try:
-                cb(**kwargs)
+                ret = cb(**kwargs)
+                if ret is not None:
+                    results.append(ret)
            except Exception as exc:
                logger.warning(
                    "Hook '%s' callback %s raised: %s",
@@ -402,6 +409,7 @@ class PluginManager:
                    getattr(cb, "__name__", repr(cb)),
                    exc,
                )
+        return results

    # -----------------------------------------------------------------------
    # Introspection
@@ -446,9 +454,12 @@ def discover_plugins() -> None:
    get_plugin_manager().discover_and_load()


-def invoke_hook(hook_name: str, **kwargs: Any) -> None:
-    """Invoke a lifecycle hook on all loaded plugins."""
-    get_plugin_manager().invoke_hook(hook_name, **kwargs)
+def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
+    """Invoke a lifecycle hook on all loaded plugins.
+
+    Returns a list of non-``None`` return values from plugin callbacks.
+    """
+    return get_plugin_manager().invoke_hook(hook_name, **kwargs)


 def get_plugin_tool_names() -> Set[str]:
@@ -63,8 +63,11 @@ def _get_model_config() -> Dict[str, Any]:
    model_cfg = config.get("model")
    if isinstance(model_cfg, dict):
        cfg = dict(model_cfg)
-        default = cfg.get("default", "").strip()
-        base_url = cfg.get("base_url", "").strip()
+        # Accept "model" as alias for "default" (users intuitively write model.model)
+        if not cfg.get("default") and cfg.get("model"):
+            cfg["default"] = cfg["model"]
+        default = (cfg.get("default") or "").strip()
+        base_url = (cfg.get("base_url") or "").strip()
        is_local = "localhost" in base_url or "127.0.0.1" in base_url
        is_fallback = not default or default == "anthropic/claude-opus-4.6"
        if is_local and is_fallback and base_url:
@@ -203,7 +206,7 @@ def _resolve_named_custom_runtime(
        or _detect_api_mode_for_url(base_url)
        or "chat_completions",
        "base_url": base_url,
-        "api_key": api_key,
+        "api_key": api_key or "no-key-required",
        "source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
    }

@@ -407,12 +410,6 @@ def resolve_runtime_provider(
            # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
            elif base_url.rstrip("/").endswith("/anthropic"):
                api_mode = "anthropic_messages"
-            # MiniMax providers always use Anthropic Messages API.
-            # Auto-correct stale /v1 URLs (from old .env or config) to /anthropic.
-            elif provider in ("minimax", "minimax-cn"):
-                api_mode = "anthropic_messages"
-                if base_url.rstrip("/").endswith("/v1"):
-                    base_url = base_url.rstrip("/")[:-3] + "/anthropic"
        return {
            "provider": provider,
            "api_mode": api_mode,
@@ -585,11 +585,11 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))

-    # Web tools (Parallel, Firecrawl, or Tavily)
-    if get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL") or get_env_value("TAVILY_API_KEY"):
+    # Web tools (Exa, Parallel, Firecrawl, or Tavily)
+    if get_env_value("EXA_API_KEY") or get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL") or get_env_value("TAVILY_API_KEY"):
        tool_status.append(("Web Search & Extract", True, None))
    else:
-        tool_status.append(("Web Search & Extract", False, "PARALLEL_API_KEY, FIRECRAWL_API_KEY, or TAVILY_API_KEY"))
+        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY, or TAVILY_API_KEY"))

    # Browser tools (local Chromium or Browserbase cloud)
    import shutil
@@ -2092,11 +2092,11 @@ def setup_terminal_backend(config: dict):
        print_info("Serverless cloud sandboxes. Each session gets its own container.")
        print_info("Requires a Modal account: https://modal.com")

-        # Check if swe-rex[modal] is installed
+        # Check if modal SDK is installed
        try:
-            __import__("swe_rex")
+            __import__("modal")
        except ImportError:
-            print_info("Installing swe-rex[modal]...")
+            print_info("Installing modal SDK...")
            import subprocess

            uv_bin = shutil.which("uv")
@@ -2108,22 +2108,22 @@ def setup_terminal_backend(config: dict):
                        "install",
                        "--python",
                        sys.executable,
-                        "swe-rex[modal]",
+                        "modal",
                    ],
                    capture_output=True,
                    text=True,
                )
            else:
                result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", "swe-rex[modal]"],
+                    [sys.executable, "-m", "pip", "install", "modal"],
                    capture_output=True,
                    text=True,
                )
            if result.returncode == 0:
-                print_success("swe-rex[modal] installed")
+                print_success("modal SDK installed")
            else:
                print_warning(
-                    "Install failed — run manually: pip install 'swe-rex[modal]'"
+                    "Install failed — run manually: pip install modal"
                )

        # Modal token
@@ -24,6 +24,10 @@ PLATFORMS = {
    "whatsapp": "📱 WhatsApp",
    "signal":   "📡 Signal",
    "email":    "📧 Email",
+    "homeassistant": "🏠 Home Assistant",
+    "mattermost": "💬 Mattermost",
+    "matrix":   "💬 Matrix",
+    "dingtalk": "💬 DingTalk",
 }

 # ─── Config Helpers ───────────────────────────────────────────────────────────
@@ -304,7 +304,8 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",


 def do_install(identifier: str, category: str = "", force: bool = False,
-               console: Optional[Console] = None, skip_confirm: bool = False) -> None:
+               console: Optional[Console] = None, skip_confirm: bool = False,
+               invalidate_cache: bool = True) -> None:
    """Fetch, quarantine, scan, confirm, and install a skill."""
    from tools.skills_hub import (
        GitHubAuth, create_source_router, ensure_hub_dirs,
@@ -417,12 +418,16 @@ def do_install(identifier: str, category: str = "", force: bool = False,
    c.print(f"[bold green]Installed:[/] {install_dir.relative_to(SKILLS_DIR)}")
    c.print(f"[dim]Files: {', '.join(bundle.files.keys())}[/]\n")

-    # Invalidate the skills prompt cache so the new skill appears immediately
-    try:
-        from agent.prompt_builder import clear_skills_system_prompt_cache
-        clear_skills_system_prompt_cache(clear_snapshot=True)
-    except Exception:
-        pass
+    if invalidate_cache:
+        # Invalidate the skills prompt cache so the new skill appears immediately
+        try:
+            from agent.prompt_builder import clear_skills_system_prompt_cache
+            clear_skills_system_prompt_cache(clear_snapshot=True)
+        except Exception:
+            pass
+    else:
+        c.print("[dim]Skill will be available in your next session.[/]")
+        c.print("[dim]Use /reset to start a new session now, or --now to activate immediately (invalidates prompt cache).[/]\n")


 def do_inspect(identifier: str, console: Optional[Console] = None) -> None:
@@ -610,7 +615,8 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N


 def do_uninstall(name: str, console: Optional[Console] = None,
-                 skip_confirm: bool = False) -> None:
+                 skip_confirm: bool = False,
+                 invalidate_cache: bool = True) -> None:
    """Remove a hub-installed skill with confirmation."""
    from tools.skills_hub import uninstall_skill

@@ -630,11 +636,15 @@ def do_uninstall(name: str, console: Optional[Console] = None,
    success, msg = uninstall_skill(name)
    if success:
        c.print(f"[bold green]{msg}[/]\n")
-        try:
-            from agent.prompt_builder import clear_skills_system_prompt_cache
-            clear_skills_system_prompt_cache(clear_snapshot=True)
-        except Exception:
-            pass
+        if invalidate_cache:
+            try:
+                from agent.prompt_builder import clear_skills_system_prompt_cache
+                clear_skills_system_prompt_cache(clear_snapshot=True)
+            except Exception:
+                pass
+        else:
+            c.print("[dim]Change will take effect in your next session.[/]")
+            c.print("[dim]Use /reset to start a new session now, or --now to apply immediately (invalidates prompt cache).[/]\n")
    else:
        c.print(f"[bold red]Error:[/] {msg}\n")

@@ -877,10 +887,15 @@ def do_snapshot_export(output_path: str, console: Optional[Console] = None) -> N
        "taps": tap_list,
    }

-    out = Path(output_path)
-    out.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n")
-    c.print(f"[bold green]Snapshot exported:[/] {out}")
-    c.print(f"[dim]{len(installed)} skill(s), {len(tap_list)} tap(s)[/]\n")
+    payload = json.dumps(snapshot, indent=2, ensure_ascii=False) + "\n"
+    if output_path == "-":
+        import sys
+        sys.stdout.write(payload)
+    else:
+        out = Path(output_path)
+        out.write_text(payload)
+        c.print(f"[bold green]Snapshot exported:[/] {out}")
+        c.print(f"[dim]{len(installed)} skill(s), {len(tap_list)} tap(s)[/]\n")


 def do_snapshot_import(input_path: str, force: bool = False,
@@ -1071,19 +1086,23 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "install":
        if not args:
-            c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force|--yes]\n")
+            c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force] [--now]\n")
            return
        identifier = args[0]
        category = ""
-        # --yes / -y bypasses confirmation prompt (needed in TUI mode)
-        # --force handles reinstall override
-        skip_confirm = any(flag in args for flag in ("--yes", "-y"))
+        # Slash commands run inside prompt_toolkit where input() hangs.
+        # Always skip confirmation — the user typing the command is implicit consent.
+        skip_confirm = True
        force = "--force" in args
+        # --now invalidates prompt cache immediately (costs more money).
+        # Default: defer to next session to preserve cache.
+        invalidate_cache = "--now" in args
        for i, a in enumerate(args):
            if a == "--category" and i + 1 < len(args):
                category = args[i + 1]
        do_install(identifier, category=category, force=force,
-                   skip_confirm=skip_confirm, console=c)
+                   skip_confirm=skip_confirm, invalidate_cache=invalidate_cache,
+                   console=c)

    elif action == "inspect":
        if not args:
@@ -1113,10 +1132,13 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "uninstall":
        if not args:
-            c.print("[bold red]Usage:[/] /skills uninstall <name> [--yes]\n")
+            c.print("[bold red]Usage:[/] /skills uninstall <name> [--now]\n")
            return
-        skip_confirm = any(flag in args for flag in ("--yes", "-y"))
-        do_uninstall(args[0], console=c, skip_confirm=skip_confirm)
+        # Slash commands run inside prompt_toolkit where input() hangs.
+        skip_confirm = True
+        invalidate_cache = "--now" in args
+        do_uninstall(args[0], console=c, skip_confirm=skip_confirm,
+                     invalidate_cache=invalidate_cache)

    elif action == "publish":
        if not args:
@@ -292,8 +292,9 @@ def show_status(args):
        print("  Manager:      systemd (user)")
        
    elif sys.platform == 'darwin':
+        from hermes_cli.gateway import get_launchd_label
        result = subprocess.run(
-            ["launchctl", "list", "ai.hermes.gateway"],
+            ["launchctl", "list", get_launchd_label()],
            capture_output=True,
            text=True
        )
@@ -138,6 +138,7 @@ PLATFORMS = {
    "matrix":   {"label": "💬 Matrix",     "default_toolset": "hermes-matrix"},
    "dingtalk": {"label": "💬 DingTalk",   "default_toolset": "hermes-dingtalk"},
    "api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
+    "mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
 }


@@ -189,6 +190,14 @@ TOOL_CATEGORIES = {
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
                ],
            },
+            {
+                "name": "Exa",
+                "tag": "AI-native search and contents",
+                "web_backend": "exa",
+                "env_vars": [
+                    {"key": "EXA_API_KEY", "prompt": "Exa API key", "url": "https://exa.ai"},
+                ],
+            },
            {
                "name": "Parallel",
                "tag": "AI-native search and extract",
@@ -0,0 +1,256 @@
+"""hermes webhook — manage dynamic webhook subscriptions from the CLI.
+
+Usage:
+    hermes webhook subscribe <name> [options]
+    hermes webhook list
+    hermes webhook remove <name>
+    hermes webhook test <name> [--payload '{"key": "value"}']
+
+Subscriptions persist to ~/.hermes/webhook_subscriptions.json and are
+hot-reloaded by the webhook adapter without a gateway restart.
+"""
+
+import json
+import os
+import re
+import secrets
+import time
+from pathlib import Path
+from typing import Dict, Optional
+
+
+_SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
+
+
+def _hermes_home() -> Path:
+    return Path(
+        os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))
+    ).expanduser()
+
+
+def _subscriptions_path() -> Path:
+    return _hermes_home() / _SUBSCRIPTIONS_FILENAME
+
+
+def _load_subscriptions() -> Dict[str, dict]:
+    path = _subscriptions_path()
+    if not path.exists():
+        return {}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        return data if isinstance(data, dict) else {}
+    except Exception:
+        return {}
+
+
+def _save_subscriptions(subs: Dict[str, dict]) -> None:
+    path = _subscriptions_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = path.with_suffix(".tmp")
+    tmp_path.write_text(
+        json.dumps(subs, indent=2, ensure_ascii=False),
+        encoding="utf-8",
+    )
+    os.replace(str(tmp_path), str(path))
+
+
+def _get_webhook_config() -> dict:
+    """Load webhook platform config. Returns {} if not configured."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        return cfg.get("platforms", {}).get("webhook", {})
+    except Exception:
+        return {}
+
+
+def _is_webhook_enabled() -> bool:
+    return bool(_get_webhook_config().get("enabled"))
+
+
+def _get_webhook_base_url() -> str:
+    wh = _get_webhook_config().get("extra", {})
+    host = wh.get("host", "0.0.0.0")
+    port = wh.get("port", 8644)
+    display_host = "localhost" if host == "0.0.0.0" else host
+    return f"http://{display_host}:{port}"
+
+
+_SETUP_HINT = """
+  Webhook platform is not enabled. To set it up:
+
+  1. Run the gateway setup wizard:
+     hermes gateway setup
+
+  2. Or manually add to ~/.hermes/config.yaml:
+     platforms:
+       webhook:
+         enabled: true
+         extra:
+           host: "0.0.0.0"
+           port: 8644
+           secret: "your-global-hmac-secret"
+
+  3. Or set environment variables in ~/.hermes/.env:
+     WEBHOOK_ENABLED=true
+     WEBHOOK_PORT=8644
+     WEBHOOK_SECRET=your-global-secret
+
+  Then start the gateway: hermes gateway run
+"""
+
+
+def _require_webhook_enabled() -> bool:
+    """Check webhook is enabled. Print setup guide and return False if not."""
+    if _is_webhook_enabled():
+        return True
+    print(_SETUP_HINT)
+    return False
+
+
+def webhook_command(args):
+    """Entry point for 'hermes webhook' subcommand."""
+    sub = getattr(args, "webhook_action", None)
+
+    if not sub:
+        print("Usage: hermes webhook {subscribe|list|remove|test}")
+        print("Run 'hermes webhook --help' for details.")
+        return
+
+    if not _require_webhook_enabled():
+        return
+
+    if sub in ("subscribe", "add"):
+        _cmd_subscribe(args)
+    elif sub in ("list", "ls"):
+        _cmd_list(args)
+    elif sub in ("remove", "rm"):
+        _cmd_remove(args)
+    elif sub == "test":
+        _cmd_test(args)
+
+
+def _cmd_subscribe(args):
+    name = args.name.strip().lower().replace(" ", "-")
+    if not re.match(r'^[a-z0-9][a-z0-9_-]*$', name):
+        print(f"Error: Invalid name '{name}'. Use lowercase alphanumeric with hyphens/underscores.")
+        return
+
+    subs = _load_subscriptions()
+    is_update = name in subs
+
+    secret = args.secret or secrets.token_urlsafe(32)
+    events = [e.strip() for e in args.events.split(",")] if args.events else []
+
+    route = {
+        "description": args.description or f"Agent-created subscription: {name}",
+        "events": events,
+        "secret": secret,
+        "prompt": args.prompt or "",
+        "skills": [s.strip() for s in args.skills.split(",")] if args.skills else [],
+        "deliver": args.deliver or "log",
+        "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    }
+
+    if args.deliver_chat_id:
+        route["deliver_extra"] = {"chat_id": args.deliver_chat_id}
+
+    subs[name] = route
+    _save_subscriptions(subs)
+
+    base_url = _get_webhook_base_url()
+    status = "Updated" if is_update else "Created"
+
+    print(f"\n  {status} webhook subscription: {name}")
+    print(f"  URL:    {base_url}/webhooks/{name}")
+    print(f"  Secret: {secret}")
+    if events:
+        print(f"  Events: {', '.join(events)}")
+    else:
+        print("  Events: (all)")
+    print(f"  Deliver: {route['deliver']}")
+    if route.get("prompt"):
+        prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "")
+        print(f"  Prompt: {prompt_preview}")
+    print(f"\n  Configure your service to POST to the URL above.")
+    print(f"  Use the secret for HMAC-SHA256 signature validation.")
+    print(f"  The gateway must be running to receive events (hermes gateway run).\n")
+
+
+def _cmd_list(args):
+    subs = _load_subscriptions()
+    if not subs:
+        print("  No dynamic webhook subscriptions.")
+        print("  Create one with: hermes webhook subscribe <name>")
+        return
+
+    base_url = _get_webhook_base_url()
+    print(f"\n  {len(subs)} webhook subscription(s):\n")
+    for name, route in subs.items():
+        events = ", ".join(route.get("events", [])) or "(all)"
+        deliver = route.get("deliver", "log")
+        desc = route.get("description", "")
+        print(f"  ◆ {name}")
+        if desc:
+            print(f"    {desc}")
+        print(f"    URL:     {base_url}/webhooks/{name}")
+        print(f"    Events:  {events}")
+        print(f"    Deliver: {deliver}")
+        print()
+
+
+def _cmd_remove(args):
+    name = args.name.strip().lower()
+    subs = _load_subscriptions()
+
+    if name not in subs:
+        print(f"  No subscription named '{name}'.")
+        print("  Note: Static routes from config.yaml cannot be removed here.")
+        return
+
+    del subs[name]
+    _save_subscriptions(subs)
+    print(f"  Removed webhook subscription: {name}")
+
+
+def _cmd_test(args):
+    """Send a test POST to a webhook route."""
+    name = args.name.strip().lower()
+    subs = _load_subscriptions()
+
+    if name not in subs:
+        print(f"  No subscription named '{name}'.")
+        return
+
+    route = subs[name]
+    secret = route.get("secret", "")
+    base_url = _get_webhook_base_url()
+    url = f"{base_url}/webhooks/{name}"
+
+    payload = args.payload or '{"test": true, "event_type": "test", "message": "Hello from hermes webhook test"}'
+
+    import hmac
+    import hashlib
+    sig = "sha256=" + hmac.new(
+        secret.encode(), payload.encode(), hashlib.sha256
+    ).hexdigest()
+
+    print(f"  Sending test POST to {url}")
+    try:
+        import urllib.request
+        req = urllib.request.Request(
+            url,
+            data=payload.encode(),
+            headers={
+                "Content-Type": "application/json",
+                "X-Hub-Signature-256": sig,
+                "X-GitHub-Event": "test",
+            },
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            body = resp.read().decode()
+            print(f"  Response ({resp.status}): {body}")
+    except Exception as e:
+        print(f"  Error: {e}")
+        print("  Is the gateway running? (hermes gateway run)")
@@ -17,6 +17,27 @@ def get_hermes_home() -> Path:
    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))


+def get_hermes_dir(new_subpath: str, old_name: str) -> Path:
+    """Resolve a Hermes subdirectory with backward compatibility.
+
+    New installs get the consolidated layout (e.g. ``cache/images``).
+    Existing installs that already have the old path (e.g. ``image_cache``)
+    keep using it — no migration required.
+
+    Args:
+        new_subpath: Preferred path relative to HERMES_HOME (e.g. ``"cache/images"``).
+        old_name: Legacy path relative to HERMES_HOME (e.g. ``"image_cache"``).
+
+    Returns:
+        Absolute ``Path`` — old location if it exists on disk, otherwise the new one.
+    """
+    home = get_hermes_home()
+    old_path = home / old_name
+    if old_path.exists():
+        return old_path
+    return home / new_subpath
+
+
 VALID_REASONING_EFFORTS = ("xhigh", "high", "medium", "low", "minimal")


@@ -270,7 +270,7 @@ def cmd_status(args) -> None:
            print(f"    {peer}: {mode}")
    print(f"  Write freq:     {hcfg.write_frequency}")

-    if hcfg.enabled and hcfg.api_key:
+    if hcfg.enabled and (hcfg.api_key or hcfg.base_url):
        print("\n  Connection... ", end="", flush=True)
        try:
            get_honcho_client(hcfg)
@@ -278,7 +278,7 @@ def cmd_status(args) -> None:
        except Exception as e:
            print(f"FAILED ({e})\n")
    else:
-        reason = "disabled" if not hcfg.enabled else "no API key"
+        reason = "disabled" if not hcfg.enabled else "no API key or base URL"
        print(f"\n  Not connected ({reason})\n")


@@ -417,9 +417,18 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    else:
        logger.info("Initializing Honcho client (host: %s, workspace: %s)", config.host, config.workspace_id)

+    # Local Honcho instances don't require an API key, but the SDK
+    # expects a non-empty string.  Use a placeholder for local URLs.
+    _is_local = resolved_base_url and (
+        "localhost" in resolved_base_url
+        or "127.0.0.1" in resolved_base_url
+        or "::1" in resolved_base_url
+    )
+    effective_api_key = config.api_key or ("local" if _is_local else None)
+
    kwargs: dict = {
        "workspace_id": config.workspace_id,
-        "api_key": config.api_key,
+        "api_key": effective_api_key,
        "environment": config.environment,
    }
    if resolved_base_url:
@@ -111,6 +111,7 @@
      fi
      mkdir -p "$TARGET_HOME"
      chown "$HERMES_UID:$HERMES_GID" "$TARGET_HOME"
+      chmod 0750 "$TARGET_HOME"

      # Ensure HERMES_HOME is owned by the target user
      if [ -n "''${HERMES_HOME:-}" ] && [ -d "$HERMES_HOME" ]; then
@@ -551,8 +552,8 @@
      # ── Directories ───────────────────────────────────────────────────
      {
        systemd.tmpfiles.rules = [
-          "d ${cfg.stateDir}                0755 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/.hermes        0755 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.stateDir}                0750 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.stateDir}/.hermes        0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.workingDirectory}         0750 ${cfg.user} ${cfg.group} - -"
        ];
@@ -566,21 +567,23 @@
          mkdir -p ${cfg.stateDir}/home
          mkdir -p ${cfg.workingDirectory}
          chown ${cfg.user}:${cfg.group} ${cfg.stateDir} ${cfg.stateDir}/.hermes ${cfg.stateDir}/home ${cfg.workingDirectory}
+          chmod 0750 ${cfg.stateDir} ${cfg.stateDir}/.hermes ${cfg.stateDir}/home ${cfg.workingDirectory}

          # Merge Nix settings into existing config.yaml.
          # Preserves user-added keys (skills, streaming, etc.); Nix keys win.
          # If configFile is user-provided (not generated), overwrite instead of merge.
          ${if cfg.configFile != null then ''
-            install -o ${cfg.user} -g ${cfg.group} -m 0644 -D ${configFile} ${cfg.stateDir}/.hermes/config.yaml
+            install -o ${cfg.user} -g ${cfg.group} -m 0640 -D ${configFile} ${cfg.stateDir}/.hermes/config.yaml
          '' else ''
            ${configMergeScript} ${generatedConfigFile} ${cfg.stateDir}/.hermes/config.yaml
            chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/config.yaml
-            chmod 0644 ${cfg.stateDir}/.hermes/config.yaml
+            chmod 0640 ${cfg.stateDir}/.hermes/config.yaml
          ''}

          # Managed mode marker (so interactive shells also detect NixOS management)
          touch ${cfg.stateDir}/.hermes/.managed
          chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.managed
+          chmod 0644 ${cfg.stateDir}/.hermes/.managed

          # Seed auth file if provided
          ${lib.optionalString (cfg.authFile != null) ''
@@ -612,7 +615,7 @@ HERMES_NIX_ENV_EOF

          # Link documents into workspace
          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
-            install -o ${cfg.user} -g ${cfg.group} -m 0644 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
+            install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
          '') cfg.documents)}
        '';
      }
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.4.0"
+version = "0.5.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -26,6 +26,7 @@ dependencies = [
  # Interactive CLI (prompt_toolkit is used directly by cli.py)
  "prompt_toolkit>=3.0.52,<4",
  # Tools
+  "exa-py>=2.9.0,<3",
  "firecrawl-py>=4.16.0,<5",
  "parallel-web>=0.4.2,<1",
  "fal-client>=0.13.1,<1",
@@ -37,7 +38,7 @@ dependencies = [
 ]

 [project.optional-dependencies]
-modal = ["swe-rex[modal]>=1.4.0,<2"]
+modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
 dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
 messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
@@ -88,7 +88,7 @@ from agent.model_metadata import (
 )
 from agent.context_compressor import ContextCompressor
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -361,6 +361,85 @@ def _inject_honcho_turn_context(content, turn_context: str):
    return f"{text}\n\n{note}"


+# Budget warning text patterns injected by _get_budget_warning().
+_BUDGET_WARNING_RE = re.compile(
+    r"\[BUDGET(?:\s+WARNING)?:\s+Iteration\s+\d+/\d+\..*?\]",
+    re.DOTALL,
+)
+
+
+# Regex to match lone surrogate code points (U+D800..U+DFFF).
+# These are invalid in UTF-8 and cause UnicodeEncodeError when the OpenAI SDK
+# serialises messages to JSON.  Common source: clipboard paste from Google Docs
+# or other rich-text editors on some platforms.
+_SURROGATE_RE = re.compile(r'[\ud800-\udfff]')
+
+
+def _sanitize_surrogates(text: str) -> str:
+    """Replace lone surrogate code points with U+FFFD (replacement character).
+
+    Surrogates are invalid in UTF-8 and will crash ``json.dumps()`` inside the
+    OpenAI SDK.  This is a fast no-op when the text contains no surrogates.
+    """
+    if _SURROGATE_RE.search(text):
+        return _SURROGATE_RE.sub('\ufffd', text)
+    return text
+
+
+def _sanitize_messages_surrogates(messages: list) -> bool:
+    """Sanitize surrogate characters from all string content in a messages list.
+
+    Walks message dicts in-place.  Returns True if any surrogates were found
+    and replaced, False otherwise.
+    """
+    found = False
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        content = msg.get("content")
+        if isinstance(content, str) and _SURROGATE_RE.search(content):
+            msg["content"] = _SURROGATE_RE.sub('\ufffd', content)
+            found = True
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    text = part.get("text")
+                    if isinstance(text, str) and _SURROGATE_RE.search(text):
+                        part["text"] = _SURROGATE_RE.sub('\ufffd', text)
+                        found = True
+    return found
+
+
+def _strip_budget_warnings_from_history(messages: list) -> None:
+    """Remove budget pressure warnings from tool-result messages in-place.
+
+    Budget warnings are turn-scoped signals that must not leak into replayed
+    history.  They live in tool-result ``content`` either as a JSON key
+    (``_budget_warning``) or appended plain text.
+    """
+    for msg in messages:
+        if not isinstance(msg, dict) or msg.get("role") != "tool":
+            continue
+        content = msg.get("content")
+        if not isinstance(content, str) or "_budget_warning" not in content and "[BUDGET" not in content:
+            continue
+
+        # Try JSON first (the common case: _budget_warning key in a dict)
+        try:
+            parsed = json.loads(content)
+            if isinstance(parsed, dict) and "_budget_warning" in parsed:
+                del parsed["_budget_warning"]
+                msg["content"] = json.dumps(parsed, ensure_ascii=False)
+                continue
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+        # Fallback: strip the text pattern from plain-text tool results
+        cleaned = _BUDGET_WARNING_RE.sub("", content).strip()
+        if cleaned != content:
+            msg["content"] = cleaned
+
+
 class AIAgent:
    """
    AI Agent with tool calling capabilities.
@@ -782,6 +861,25 @@ class AIAgent:
                    }
            
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
+
+            # Enable fine-grained tool streaming for Claude on OpenRouter.
+            # Without this, Anthropic buffers the entire tool call and goes
+            # silent for minutes while thinking — OpenRouter's upstream proxy
+            # times out during the silence.  The beta header makes Anthropic
+            # stream tool call arguments token-by-token, keeping the
+            # connection alive.
+            _effective_base = str(client_kwargs.get("base_url", "")).lower()
+            if "openrouter" in _effective_base and "claude" in (self.model or "").lower():
+                headers = client_kwargs.get("default_headers") or {}
+                existing_beta = headers.get("x-anthropic-beta", "")
+                _FINE_GRAINED = "fine-grained-tool-streaming-2025-05-14"
+                if _FINE_GRAINED not in existing_beta:
+                    if existing_beta:
+                        headers["x-anthropic-beta"] = f"{existing_beta},{_FINE_GRAINED}"
+                    else:
+                        headers["x-anthropic-beta"] = _FINE_GRAINED
+                    client_kwargs["default_headers"] = headers
+
            self.api_key = client_kwargs.get("api_key", "")
            try:
                self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
@@ -986,8 +1084,8 @@ class AIAgent:
                    else:
                        if not hcfg.enabled:
                            logger.debug("Honcho disabled in global config")
-                        elif not hcfg.api_key:
-                            logger.debug("Honcho enabled but no API key configured")
+                        elif not (hcfg.api_key or hcfg.base_url):
+                            logger.debug("Honcho enabled but no API key or base URL configured")
                        else:
                            logger.debug("Honcho enabled but missing API key or disabled in config")
            except Exception as e:
@@ -1024,6 +1122,13 @@ class AIAgent:
        except Exception:
            pass

+        # Tool-use enforcement config: "auto" (default — matches hardcoded
+        # model list), true (always), false (never), or list of substrings.
+        _agent_section = _agent_cfg.get("agent", {})
+        if not isinstance(_agent_section, dict):
+            _agent_section = {}
+        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
+
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -2187,8 +2292,14 @@ class AIAgent:
    # ── Honcho integration helpers ──

    def _honcho_should_activate(self, hcfg) -> bool:
-        """Return True when remote Honcho should be active."""
-        if not hcfg or not hcfg.enabled or not hcfg.api_key:
+        """Return True when Honcho should be active.
+
+        Self-hosted Honcho may be configured with a base_url and no API key,
+        so activation should accept either credential style.
+        """
+        if not hcfg or not hcfg.enabled:
+            return False
+        if not (hcfg.api_key or hcfg.base_url):
            return False
        return True

@@ -2454,6 +2565,30 @@ class AIAgent:
        if tool_guidance:
            prompt_parts.append(" ".join(tool_guidance))

+        # Tool-use enforcement: tells the model to actually call tools instead
+        # of describing intended actions.  Controlled by config.yaml
+        # agent.tool_use_enforcement:
+        #   "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
+        #   true  — always inject (all models)
+        #   false — never inject
+        #   list  — custom model-name substrings to match
+        if self.valid_tool_names:
+            _enforce = self._tool_use_enforcement
+            _inject = False
+            if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in ("true", "always", "yes", "on")):
+                _inject = True
+            elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in ("false", "never", "no", "off")):
+                _inject = False
+            elif isinstance(_enforce, list):
+                model_lower = (self.model or "").lower()
+                _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
+            else:
+                # "auto" or any unrecognised value — use hardcoded defaults
+                model_lower = (self.model or "").lower()
+                _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
+            if _inject:
+                prompt_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
+
        # Honcho CLI awareness: tell Hermes about its own management commands
        # so it can refer the user to them rather than reinventing answers.
        if self._honcho and self._honcho_session_key:
@@ -3796,6 +3931,12 @@ class AIAgent:
            content_parts: list = []
            tool_calls_acc: dict = {}
            tool_gen_notified: set = set()
+            # Ollama-compatible endpoints reuse index 0 for every tool call
+            # in a parallel batch, distinguishing them only by id.  Track
+            # the last seen id per raw index so we can detect a new tool
+            # call starting at the same index and redirect it to a fresh slot.
+            _last_id_at_idx: dict = {}      # raw_index -> last seen non-empty id
+            _active_slot_by_idx: dict = {}  # raw_index -> current slot in tool_calls_acc
            finish_reason = None
            model_name = None
            role = "assistant"
@@ -3837,11 +3978,45 @@ class AIAgent:
                        _fire_first_delta()
                        self._fire_stream_delta(delta.content)
                        deltas_were_sent["yes"] = True
+                    else:
+                        # Tool calls suppress regular content streaming (avoids
+                        # displaying chatty "I'll use the tool..." text alongside
+                        # tool calls).  But reasoning tags embedded in suppressed
+                        # content should still reach the display — otherwise the
+                        # reasoning box only appears as a post-response fallback,
+                        # rendering it confusingly after the already-streamed
+                        # response.  Route suppressed content through the stream
+                        # delta callback so its tag extraction can fire the
+                        # reasoning display.  Non-reasoning text is harmlessly
+                        # suppressed by the CLI's _stream_delta when the stream
+                        # box is already closed (tool boundary flush).
+                        if self.stream_delta_callback:
+                            try:
+                                self.stream_delta_callback(delta.content)
+                            except Exception:
+                                pass

                # Accumulate tool call deltas — notify display on first name
                if delta and delta.tool_calls:
                    for tc_delta in delta.tool_calls:
-                        idx = tc_delta.index if tc_delta.index is not None else 0
+                        raw_idx = tc_delta.index if tc_delta.index is not None else 0
+                        delta_id = tc_delta.id or ""
+
+                        # Ollama fix: detect a new tool call reusing the same
+                        # raw index (different id) and redirect to a fresh slot.
+                        if raw_idx not in _active_slot_by_idx:
+                            _active_slot_by_idx[raw_idx] = raw_idx
+                        if (
+                            delta_id
+                            and raw_idx in _last_id_at_idx
+                            and delta_id != _last_id_at_idx[raw_idx]
+                        ):
+                            new_slot = max(tool_calls_acc, default=-1) + 1
+                            _active_slot_by_idx[raw_idx] = new_slot
+                        if delta_id:
+                            _last_id_at_idx[raw_idx] = delta_id
+                        idx = _active_slot_by_idx[raw_idx]
+
                        if idx not in tool_calls_acc:
                            tool_calls_acc[idx] = {
                                "id": tc_delta.id or "",
@@ -3994,7 +4169,37 @@ class AIAgent:
                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                        )

-                        if _is_timeout or _is_conn_err:
+                        # SSE error events from proxies (e.g. OpenRouter sends
+                        # {"error":{"message":"Network connection lost."}}) are
+                        # raised as APIError by the OpenAI SDK.  These are
+                        # semantically identical to httpx connection drops —
+                        # the upstream stream died — and should be retried with
+                        # a fresh connection.  Distinguish from HTTP errors:
+                        # APIError from SSE has no status_code, while
+                        # APIStatusError (4xx/5xx) always has one.
+                        _is_sse_conn_err = False
+                        if not _is_timeout and not _is_conn_err:
+                            from openai import APIError as _APIError
+                            if isinstance(e, _APIError) and not getattr(e, "status_code", None):
+                                _err_lower_sse = str(e).lower()
+                                _SSE_CONN_PHRASES = (
+                                    "connection lost",
+                                    "connection reset",
+                                    "connection closed",
+                                    "connection terminated",
+                                    "network error",
+                                    "network connection",
+                                    "terminated",
+                                    "peer closed",
+                                    "broken pipe",
+                                    "upstream connect error",
+                                )
+                                _is_sse_conn_err = any(
+                                    phrase in _err_lower_sse
+                                    for phrase in _SSE_CONN_PHRASES
+                                )
+
+                        if _is_timeout or _is_conn_err or _is_sse_conn_err:
                            # Transient network / timeout error. Retry the
                            # streaming request with a fresh connection first.
                            if _stream_attempt < _max_stream_retries:
@@ -4507,6 +4712,20 @@ class AIAgent:

        if self.max_tokens is not None:
            api_kwargs.update(self._max_tokens_param(self.max_tokens))
+        elif self._is_openrouter_url() and "claude" in (self.model or "").lower():
+            # OpenRouter translates requests to Anthropic's Messages API,
+            # which requires max_tokens as a mandatory field.  When we omit
+            # it, OpenRouter picks a default that can be too low — the model
+            # spends its output budget on thinking and has almost nothing
+            # left for the actual response (especially large tool calls like
+            # write_file).  Sending the model's real output limit ensures
+            # full capacity.  Other providers handle the default fine.
+            try:
+                from agent.anthropic_adapter import _get_anthropic_max_output
+                _model_output_limit = _get_anthropic_max_output(self.model)
+                api_kwargs["max_tokens"] = _model_output_limit
+            except Exception:
+                pass  # fail open — let OpenRouter pick its default

        extra_body = {}

@@ -5788,6 +6007,14 @@ class AIAgent:
        # Installed once, transparent when streams are healthy, prevents crash on write.
        _install_safe_stdio()

+        # Sanitize surrogate characters from user input.  Clipboard paste from
+        # rich-text editors (Google Docs, Word, etc.) can inject lone surrogates
+        # that are invalid UTF-8 and crash JSON serialization in the OpenAI SDK.
+        if isinstance(user_message, str):
+            user_message = _sanitize_surrogates(user_message)
+        if isinstance(persist_user_message, str):
+            persist_user_message = _sanitize_surrogates(persist_user_message)
+
        # Store stream callback for _interruptible_api_call to pick up
        self._stream_callback = stream_callback
        self._persist_user_message_idx = None
@@ -5804,6 +6031,7 @@ class AIAgent:
        self._codex_incomplete_retries = 0
        self._last_content_with_tools = None
        self._mute_post_response = False
+        self._surrogate_sanitized = False
        # NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
        # They are initialized in __init__ and must persist across run_conversation
        # calls so that nudge logic accumulates correctly in CLI mode.
@@ -5811,6 +6039,14 @@ class AIAgent:
        
        # Initialize conversation (copy to avoid mutating the caller's list)
        messages = list(conversation_history) if conversation_history else []
+
+        # Strip budget pressure warnings from previous turns.  These are
+        # turn-scoped signals injected by _get_budget_warning() into tool
+        # result content.  If left in the replayed history, models (especially
+        # GPT-family) interpret them as still-active instructions and avoid
+        # making tool calls in ALL subsequent turns.
+        if messages:
+            _strip_budget_warnings_from_history(messages)
        
        # Hydrate todo store from conversation history (gateway creates a fresh
        # AIAgent per message, so the in-memory store is empty -- we need to
@@ -5906,6 +6142,22 @@ class AIAgent:
                    self._cached_system_prompt = (
                        self._cached_system_prompt + "\n\n" + self._honcho_context
                    ).strip()
+
+                # Plugin hook: on_session_start
+                # Fired once when a brand-new session is created (not on
+                # continuation).  Plugins can use this to initialise
+                # session-scoped state (e.g. warm a memory cache).
+                try:
+                    from hermes_cli.plugins import invoke_hook as _invoke_hook
+                    _invoke_hook(
+                        "on_session_start",
+                        session_id=self.session_id,
+                        model=self.model,
+                        platform=getattr(self, "platform", None) or "",
+                    )
+                except Exception as exc:
+                    logger.warning("on_session_start hook failed: %s", exc)
+
                # Store the system prompt snapshot in SQLite
                if self._session_db:
                    try:
@@ -5967,6 +6219,34 @@ class AIAgent:
                    if _preflight_tokens < self.context_compressor.threshold_tokens:
                        break  # Under threshold

+        # Plugin hook: pre_llm_call
+        # Fired once per turn before the tool-calling loop.  Plugins can
+        # return a dict with a ``context`` key whose value is a string
+        # that will be appended to the ephemeral system prompt for every
+        # API call in this turn (not persisted to session DB or cache).
+        _plugin_turn_context = ""
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _pre_results = _invoke_hook(
+                "pre_llm_call",
+                session_id=self.session_id,
+                user_message=original_user_message,
+                conversation_history=list(messages),
+                is_first_turn=(not bool(conversation_history)),
+                model=self.model,
+                platform=getattr(self, "platform", None) or "",
+            )
+            _ctx_parts = []
+            for r in _pre_results:
+                if isinstance(r, dict) and r.get("context"):
+                    _ctx_parts.append(str(r["context"]))
+                elif isinstance(r, str) and r.strip():
+                    _ctx_parts.append(r)
+            if _ctx_parts:
+                _plugin_turn_context = "\n\n".join(_ctx_parts)
+        except Exception as exc:
+            logger.warning("pre_llm_call hook failed: %s", exc)
+
        # Main conversation loop
        api_call_count = 0
        final_response = None
@@ -6064,6 +6344,9 @@ class AIAgent:
            effective_system = active_system_prompt or ""
            if self.ephemeral_system_prompt:
                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+            # Plugin context from pre_llm_call hooks — ephemeral, not cached.
+            if _plugin_turn_context:
+                effective_system = (effective_system + "\n\n" + _plugin_turn_context).strip()
            if effective_system:
                api_messages = [{"role": "system", "content": effective_system}] + api_messages

@@ -6584,6 +6867,24 @@ class AIAgent:
                    if self.thinking_callback:
                        self.thinking_callback("")

+                    # -----------------------------------------------------------
+                    # Surrogate character recovery.  UnicodeEncodeError happens
+                    # when the messages contain lone surrogates (U+D800..U+DFFF)
+                    # that are invalid UTF-8.  Common source: clipboard paste
+                    # from Google Docs or similar rich-text editors.  We sanitize
+                    # the entire messages list in-place and retry once.
+                    # -----------------------------------------------------------
+                    if isinstance(api_error, UnicodeEncodeError) and not getattr(self, '_surrogate_sanitized', False):
+                        self._surrogate_sanitized = True
+                        if _sanitize_messages_surrogates(messages):
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Stripped invalid surrogate characters from messages. Retrying...",
+                                force=True,
+                            )
+                            continue
+                        # Surrogates weren't in messages — might be in system
+                        # prompt or prefill.  Fall through to normal error path.
+
                    status_code = getattr(api_error, "status_code", None)
                    if (
                        self.api_mode == "codex_responses"
@@ -6852,8 +7153,13 @@ class AIAgent:
                    # 529 (Anthropic overloaded) is also transient.
                    # Also catch local validation errors (ValueError, TypeError) — these
                    # are programming bugs, not transient failures.
+                    # Exclude UnicodeEncodeError — it's a ValueError subclass but is
+                    # handled separately by the surrogate sanitization path above.
                    _RETRYABLE_STATUS_CODES = {413, 429, 529}
-                    is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
+                    is_local_validation_error = (
+                        isinstance(api_error, (ValueError, TypeError))
+                        and not isinstance(api_error, UnicodeEncodeError)
+                    )
                    # Detect generic 400s from Anthropic OAuth (transient server-side failures).
                    # Real invalid_request_error responses include a descriptive message;
                    # transient ones contain only "Error" or are empty. (ref: issue #1608)
@@ -6923,6 +7229,36 @@ class AIAgent:
                        _final_summary = self._summarize_api_error(api_error)
                        self._vprint(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.", force=True)
                        self._vprint(f"{self.log_prefix}   💀 Final error: {_final_summary}", force=True)
+
+                        # Detect SSE stream-drop pattern (e.g. "Network
+                        # connection lost") and surface actionable guidance.
+                        # This typically happens when the model generates a
+                        # very large tool call (write_file with huge content)
+                        # and the proxy/CDN drops the stream mid-response.
+                        _is_stream_drop = (
+                            not getattr(api_error, "status_code", None)
+                            and any(p in error_msg for p in (
+                                "connection lost", "connection reset",
+                                "connection closed", "network connection",
+                                "network error", "terminated",
+                            ))
+                        )
+                        if _is_stream_drop:
+                            self._vprint(
+                                f"{self.log_prefix}   💡 The provider's stream "
+                                f"connection keeps dropping. This often happens "
+                                f"when the model tries to write a very large "
+                                f"file in a single tool call.",
+                                force=True,
+                            )
+                            self._vprint(
+                                f"{self.log_prefix}      Try asking the model "
+                                f"to use execute_code with Python's open() for "
+                                f"large files, or to write the file in smaller "
+                                f"sections.",
+                                force=True,
+                            )
+
                        logging.error(
                            "%sAPI call failed after %s retries. %s | provider=%s model=%s msgs=%s tokens=~%s",
                            self.log_prefix, max_retries, _final_summary,
@@ -6932,8 +7268,18 @@ class AIAgent:
                            api_kwargs, reason="max_retries_exhausted", error=api_error,
                        )
                        self._persist_session(messages, conversation_history)
+                        _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
+                        if _is_stream_drop:
+                            _final_response += (
+                                "\n\nThe provider's stream connection keeps "
+                                "dropping — this often happens when generating "
+                                "very large tool call responses (e.g. write_file "
+                                "with long content). Try asking me to use "
+                                "execute_code with Python's open() for large "
+                                "files, or to write in smaller sections."
+                            )
                        return {
-                            "final_response": f"API call failed after {max_retries} retries: {_final_summary}",
+                            "final_response": _final_response,
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
@@ -7311,7 +7657,6 @@ class AIAgent:
                        except Exception:
                            pass

-                    _msg_count_before_tools = len(messages)
                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)

                    # Signal that a paragraph break is needed before the next
@@ -7329,18 +7674,18 @@ class AIAgent:
                    if _tc_names == {"execute_code"}:
                        self.iteration_budget.refund()
                    
-                    # Estimate next prompt size using real token counts from the
-                    # last API response + rough estimate of newly appended tool
-                    # results.  This catches cases where tool results push the
-                    # context past the limit that last_prompt_tokens alone misses
-                    # (e.g. large file reads, web extractions).
+                    # Use real token counts from the API response to decide
+                    # compression.  prompt_tokens + completion_tokens is the
+                    # actual context size the provider reported plus the
+                    # assistant turn — a tight lower bound for the next prompt.
+                    # Tool results appended above aren't counted yet, but the
+                    # threshold (default 50%) leaves ample headroom; if tool
+                    # results push past it, the next API call will report the
+                    # real total and trigger compression then.
                    _compressor = self.context_compressor
-                    _new_tool_msgs = messages[_msg_count_before_tools:]
-                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
-                    _estimated_next_prompt = (
+                    _real_tokens = (
                        _compressor.last_prompt_tokens
                        + _compressor.last_completion_tokens
-                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
                    )

                    # ── Context pressure warnings (user-facing only) ──────────
@@ -7350,12 +7695,12 @@ class AIAgent:
                    # Does not inject into messages — just prints to CLI output
                    # and fires status_callback for gateway platforms.
                    if _compressor.threshold_tokens > 0:
-                        _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
+                        _compaction_progress = _real_tokens / _compressor.threshold_tokens
                        if _compaction_progress >= 0.85 and not self._context_pressure_warned:
                            self._context_pressure_warned = True
                            self._emit_context_pressure(_compaction_progress, _compressor)

-                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
+                    if self.compression_enabled and _compressor.should_compress(_real_tokens):
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message,
                            approx_tokens=self.context_compressor.last_prompt_tokens,
@@ -7602,6 +7947,25 @@ class AIAgent:
            self._honcho_sync(original_user_message, final_response)
            self._queue_honcho_prefetch(original_user_message)

+        # Plugin hook: post_llm_call
+        # Fired once per turn after the tool-calling loop completes.
+        # Plugins can use this to persist conversation data (e.g. sync
+        # to an external memory system).
+        if final_response and not interrupted:
+            try:
+                from hermes_cli.plugins import invoke_hook as _invoke_hook
+                _invoke_hook(
+                    "post_llm_call",
+                    session_id=self.session_id,
+                    user_message=original_user_message,
+                    assistant_response=final_response,
+                    conversation_history=list(messages),
+                    model=self.model,
+                    platform=getattr(self, "platform", None) or "",
+                )
+            except Exception as exc:
+                logger.warning("post_llm_call hook failed: %s", exc)
+
        # Extract reasoning from the last assistant message (if any)
        last_reasoning = None
        for msg in reversed(messages):
@@ -7667,6 +8031,22 @@ class AIAgent:
            except Exception:
                pass  # Background review is best-effort

+        # Plugin hook: on_session_end
+        # Fired at the very end of every run_conversation call.
+        # Plugins can use this for cleanup, flushing buffers, etc.
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _invoke_hook(
+                "on_session_end",
+                session_id=self.session_id,
+                completed=completed,
+                interrupted=interrupted,
+                model=self.model,
+                platform=getattr(self, "platform", None) or "",
+            )
+        except Exception as exc:
+            logger.warning("on_session_end hook failed: %s", exc)
+
        return result

    def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
@@ -2,7 +2,7 @@
 # Kill all running Modal apps (sandboxes, deployments, etc.)
 #
 # Usage:
-#   bash scripts/kill_modal.sh          # Stop swe-rex (the sandbox app)
+#   bash scripts/kill_modal.sh          # Stop hermes-agent sandboxes
 #   bash scripts/kill_modal.sh --all    # Stop ALL Modal apps

 set -uo pipefail
@@ -17,10 +17,10 @@ if [[ "${1:-}" == "--all" ]]; then
        modal app stop "$app_id" 2>/dev/null || true
    done
 else
-    echo "Stopping swe-rex sandboxes..."
-    APPS=$(echo "$APP_LIST" | grep 'swe-rex' | grep -oE 'ap-[A-Za-z0-9]+' || true)
+    echo "Stopping hermes-agent sandboxes..."
+    APPS=$(echo "$APP_LIST" | grep 'hermes-agent' | grep -oE 'ap-[A-Za-z0-9]+' || true)
    if [[ -z "$APPS" ]]; then
-        echo "  No swe-rex apps found."
+        echo "  No hermes-agent apps found."
    else
        echo "$APPS" | while read app_id; do
            echo "  Stopping $app_id"
@@ -30,5 +30,5 @@ else
 fi

 echo ""
-echo "Current swe-rex status:"
-modal app list 2>/dev/null | grep -E 'State|swe-rex' || echo "  (none)"
+echo "Current hermes-agent status:"
+modal app list 2>/dev/null | grep -E 'State|hermes-agent' || echo "  (none)"
@@ -0,0 +1,180 @@
+---
+name: webhook-subscriptions
+description: Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically.
+version: 1.0.0
+metadata:
+  hermes:
+    tags: [webhook, events, automation, integrations]
+---
+
+# Webhook Subscriptions
+
+Create dynamic webhook subscriptions so external services (GitHub, GitLab, Stripe, CI/CD, IoT sensors, monitoring tools) can trigger Hermes agent runs by POSTing events to a URL.
+
+## Setup (Required First)
+
+The webhook platform must be enabled before subscriptions can be created. Check with:
+```bash
+hermes webhook list
+```
+
+If it says "Webhook platform is not enabled", set it up:
+
+### Option 1: Setup wizard
+```bash
+hermes gateway setup
+```
+Follow the prompts to enable webhooks, set the port, and set a global HMAC secret.
+
+### Option 2: Manual config
+Add to `~/.hermes/config.yaml`:
+```yaml
+platforms:
+  webhook:
+    enabled: true
+    extra:
+      host: "0.0.0.0"
+      port: 8644
+      secret: "generate-a-strong-secret-here"
+```
+
+### Option 3: Environment variables
+Add to `~/.hermes/.env`:
+```bash
+WEBHOOK_ENABLED=true
+WEBHOOK_PORT=8644
+WEBHOOK_SECRET=generate-a-strong-secret-here
+```
+
+After configuration, start (or restart) the gateway:
+```bash
+hermes gateway run
+# Or if using systemd:
+systemctl --user restart hermes-gateway
+```
+
+Verify it's running:
+```bash
+curl http://localhost:8644/health
+```
+
+## Commands
+
+All management is via the `hermes webhook` CLI command:
+
+### Create a subscription
+```bash
+hermes webhook subscribe <name> \
+  --prompt "Prompt template with {payload.fields}" \
+  --events "event1,event2" \
+  --description "What this does" \
+  --skills "skill1,skill2" \
+  --deliver telegram \
+  --deliver-chat-id "12345" \
+  --secret "optional-custom-secret"
+```
+
+Returns the webhook URL and HMAC secret. The user configures their service to POST to that URL.
+
+### List subscriptions
+```bash
+hermes webhook list
+```
+
+### Remove a subscription
+```bash
+hermes webhook remove <name>
+```
+
+### Test a subscription
+```bash
+hermes webhook test <name>
+hermes webhook test <name> --payload '{"key": "value"}'
+```
+
+## Prompt Templates
+
+Prompts support `{dot.notation}` for accessing nested payload fields:
+
+- `{issue.title}` — GitHub issue title
+- `{pull_request.user.login}` — PR author
+- `{data.object.amount}` — Stripe payment amount
+- `{sensor.temperature}` — IoT sensor reading
+
+If no prompt is specified, the full JSON payload is dumped into the agent prompt.
+
+## Common Patterns
+
+### GitHub: new issues
+```bash
+hermes webhook subscribe github-issues \
+  --events "issues" \
+  --prompt "New GitHub issue #{issue.number}: {issue.title}\n\nAction: {action}\nAuthor: {issue.user.login}\nBody:\n{issue.body}\n\nPlease triage this issue." \
+  --deliver telegram \
+  --deliver-chat-id "-100123456789"
+```
+
+Then in GitHub repo Settings → Webhooks → Add webhook:
+- Payload URL: the returned webhook_url
+- Content type: application/json
+- Secret: the returned secret
+- Events: "Issues"
+
+### GitHub: PR reviews
+```bash
+hermes webhook subscribe github-prs \
+  --events "pull_request" \
+  --prompt "PR #{pull_request.number} {action}: {pull_request.title}\nBy: {pull_request.user.login}\nBranch: {pull_request.head.ref}\n\n{pull_request.body}" \
+  --skills "github-code-review" \
+  --deliver github_comment
+```
+
+### Stripe: payment events
+```bash
+hermes webhook subscribe stripe-payments \
+  --events "payment_intent.succeeded,payment_intent.payment_failed" \
+  --prompt "Payment {data.object.status}: {data.object.amount} cents from {data.object.receipt_email}" \
+  --deliver telegram \
+  --deliver-chat-id "-100123456789"
+```
+
+### CI/CD: build notifications
+```bash
+hermes webhook subscribe ci-builds \
+  --events "pipeline" \
+  --prompt "Build {object_attributes.status} on {project.name} branch {object_attributes.ref}\nCommit: {commit.message}" \
+  --deliver discord \
+  --deliver-chat-id "1234567890"
+```
+
+### Generic monitoring alert
+```bash
+hermes webhook subscribe alerts \
+  --prompt "Alert: {alert.name}\nSeverity: {alert.severity}\nMessage: {alert.message}\n\nPlease investigate and suggest remediation." \
+  --deliver origin
+```
+
+## Security
+
+- Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`)
+- The webhook adapter validates signatures on every incoming POST
+- Static routes from config.yaml cannot be overwritten by dynamic subscriptions
+- Subscriptions persist to `~/.hermes/webhook_subscriptions.json`
+
+## How It Works
+
+1. `hermes webhook subscribe` writes to `~/.hermes/webhook_subscriptions.json`
+2. The webhook adapter hot-reloads this file on each incoming request (mtime-gated, negligible overhead)
+3. When a POST arrives matching a route, the adapter formats the prompt and triggers an agent run
+4. The agent's response is delivered to the configured target (Telegram, Discord, GitHub comment, etc.)
+
+## Troubleshooting
+
+If webhooks aren't working:
+
+1. **Is the gateway running?** Check with `systemctl --user status hermes-gateway` or `ps aux | grep gateway`
+2. **Is the webhook server listening?** `curl http://localhost:8644/health` should return `{"status": "ok"}`
+3. **Check gateway logs:** `grep webhook ~/.hermes/logs/gateway.log | tail -20`
+4. **Signature mismatch?** Verify the secret in your service matches the one from `hermes webhook list`. GitHub sends `X-Hub-Signature-256`, GitLab sends `X-Gitlab-Token`.
+5. **Firewall/NAT?** The webhook URL must be reachable from the service. For local development, use a tunnel (ngrok, cloudflared).
+6. **Wrong event type?** Check `--events` filter matches what the service sends. Use `hermes webhook test <name>` to verify the route works.
@@ -219,6 +219,9 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then
  echo "AUTH_METHOD=gh"
 elif [ -n "$GITHUB_TOKEN" ]; then
  echo "AUTH_METHOD=curl"
+elif [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
+  export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
+  echo "AUTH_METHOD=curl"
 elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
  export GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
  echo "AUTH_METHOD=curl"
@@ -23,6 +23,11 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null 2>&1; then
    GH_USER=$(gh api user --jq '.login' 2>/dev/null)
 elif [ -n "$GITHUB_TOKEN" ]; then
    GH_AUTH_METHOD="curl"
+elif [ -f "$HOME/.hermes/.env" ] && grep -q "^GITHUB_TOKEN=" "$HOME/.hermes/.env" 2>/dev/null; then
+    GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" "$HOME/.hermes/.env" | head -1 | cut -d= -f2 | tr -d '\n\r')
+    if [ -n "$GITHUB_TOKEN" ]; then
+        GH_AUTH_METHOD="curl"
+    fi
 elif [ -f "$HOME/.git-credentials" ] && grep -q "github.com" "$HOME/.git-credentials" 2>/dev/null; then
    GITHUB_TOKEN=$(grep "github.com" "$HOME/.git-credentials" | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
    if [ -n "$GITHUB_TOKEN" ]; then
@@ -27,7 +27,11 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then
 else
  AUTH="git"
  if [ -z "$GITHUB_TOKEN" ]; then
-    GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
+      GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
+    elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
+      GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    fi
  fi
 fi

@@ -27,7 +27,11 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then
 else
  AUTH="git"
  if [ -z "$GITHUB_TOKEN" ]; then
-    GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
+      GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
+    elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
+      GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    fi
  fi
 fi

@@ -29,7 +29,11 @@ else
  AUTH="git"
  # Ensure we have a token for API calls
  if [ -z "$GITHUB_TOKEN" ]; then
-    GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
+      GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
+    elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
+      GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    fi
  fi
 fi
 echo "Using: $AUTH"
@@ -26,7 +26,11 @@ if command -v gh &>/dev/null && gh auth status &>/dev/null; then
 else
  AUTH="git"
  if [ -z "$GITHUB_TOKEN" ]; then
-    GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
+      GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
+    elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
+      GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
+    fi
  fi
 fi

@@ -18,6 +18,8 @@ from agent.prompt_builder import (
    build_context_files_prompt,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
+    TOOL_USE_ENFORCEMENT_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_MODELS,
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
@@ -926,3 +928,98 @@ class TestBuildSkillsSystemPromptConditional:
            available_toolsets=set(),
        )
        assert "nested-null" in result
+
+
+# =========================================================================
+# Tool-use enforcement guidance
+# =========================================================================
+
+
+class TestToolUseEnforcementGuidance:
+    def test_guidance_mentions_tool_calls(self):
+        assert "tool call" in TOOL_USE_ENFORCEMENT_GUIDANCE.lower()
+
+    def test_guidance_forbids_description_only(self):
+        assert "describe" in TOOL_USE_ENFORCEMENT_GUIDANCE.lower()
+        assert "promise" in TOOL_USE_ENFORCEMENT_GUIDANCE.lower()
+
+    def test_guidance_requires_action(self):
+        assert "MUST" in TOOL_USE_ENFORCEMENT_GUIDANCE
+
+    def test_enforcement_models_includes_gpt(self):
+        assert "gpt" in TOOL_USE_ENFORCEMENT_MODELS
+
+    def test_enforcement_models_includes_codex(self):
+        assert "codex" in TOOL_USE_ENFORCEMENT_MODELS
+
+    def test_enforcement_models_is_tuple(self):
+        assert isinstance(TOOL_USE_ENFORCEMENT_MODELS, tuple)
+
+
+# =========================================================================
+# Budget warning history stripping
+# =========================================================================
+
+
+class TestStripBudgetWarningsFromHistory:
+    def test_strips_json_budget_warning_key(self):
+        import json
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": json.dumps({
+                "output": "hello",
+                "exit_code": 0,
+                "_budget_warning": "[BUDGET: Iteration 55/60. 5 iterations left. Start consolidating your work.]",
+            })},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        parsed = json.loads(messages[0]["content"])
+        assert "_budget_warning" not in parsed
+        assert parsed["output"] == "hello"
+        assert parsed["exit_code"] == 0
+
+    def test_strips_text_budget_warning(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1",
+             "content": "some result\n\n[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        assert messages[0]["content"] == "some result"
+
+    def test_leaves_non_tool_messages_unchanged(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "assistant", "content": "[BUDGET WARNING: Iteration 58/60. Only 2 iteration(s) left. Provide your final response NOW. No more tool calls unless absolutely critical.]"},
+            {"role": "user", "content": "hello"},
+        ]
+        original_contents = [m["content"] for m in messages]
+        _strip_budget_warnings_from_history(messages)
+        assert [m["content"] for m in messages] == original_contents
+
+    def test_handles_empty_and_missing_content(self):
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": ""},
+            {"role": "tool", "tool_call_id": "c2"},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        assert messages[0]["content"] == ""
+
+    def test_strips_caution_variant(self):
+        import json
+        from run_agent import _strip_budget_warnings_from_history
+
+        messages = [
+            {"role": "tool", "tool_call_id": "c1", "content": json.dumps({
+                "output": "ok",
+                "_budget_warning": "[BUDGET: Iteration 42/60. 18 iterations left. Start consolidating your work.]",
+            })},
+        ]
+        _strip_budget_warnings_from_history(messages)
+        parsed = json.loads(messages[0]["content"])
+        assert "_budget_warning" not in parsed
@@ -28,6 +28,7 @@ from gateway.platforms.api_server import (
    _CORS_HEADERS,
    check_api_server_requirements,
    cors_middleware,
+    security_headers_middleware,
 )


@@ -214,9 +215,11 @@ def _make_adapter(api_key: str = "", cors_origins=None) -> APIServerAdapter:

 def _create_app(adapter: APIServerAdapter) -> web.Application:
    """Create the aiohttp app from the adapter (without starting the full server)."""
-    app = web.Application(middlewares=[cors_middleware])
+    mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
+    app = web.Application(middlewares=mws)
    app["api_server_adapter"] = adapter
    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_get("/v1/health", adapter._handle_health)
    app.router.add_get("/v1/models", adapter._handle_models)
    app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
    app.router.add_post("/v1/responses", adapter._handle_responses)
@@ -241,6 +244,16 @@ def auth_adapter():


 class TestHealthEndpoint:
+    @pytest.mark.asyncio
+    async def test_security_headers_present(self, adapter):
+        """Responses should include basic security headers."""
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/health")
+            assert resp.status == 200
+            assert resp.headers.get("X-Content-Type-Options") == "nosniff"
+            assert resp.headers.get("Referrer-Policy") == "no-referrer"
+
    @pytest.mark.asyncio
    async def test_health_returns_ok(self, adapter):
        app = _create_app(adapter)
@@ -251,6 +264,17 @@ class TestHealthEndpoint:
            assert data["status"] == "ok"
            assert data["platform"] == "hermes-agent"

+    @pytest.mark.asyncio
+    async def test_v1_health_alias_returns_ok(self, adapter):
+        """GET /v1/health should return the same response as /health."""
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/v1/health")
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["status"] == "ok"
+            assert data["platform"] == "hermes-agent"
+

 # ---------------------------------------------------------------------------
 # /v1/models endpoint
@@ -1300,6 +1324,31 @@ class TestCORS:
            assert "POST" in resp.headers.get("Access-Control-Allow-Methods", "")
            assert "DELETE" in resp.headers.get("Access-Control-Allow-Methods", "")

+    @pytest.mark.asyncio
+    async def test_cors_allows_idempotency_key_header(self):
+        adapter = _make_adapter(cors_origins=["http://localhost:3000"])
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.options(
+                "/v1/chat/completions",
+                headers={
+                    "Origin": "http://localhost:3000",
+                    "Access-Control-Request-Method": "POST",
+                    "Access-Control-Request-Headers": "Idempotency-Key",
+                },
+            )
+            assert resp.status == 200
+            assert "Idempotency-Key" in resp.headers.get("Access-Control-Allow-Headers", "")
+
+    @pytest.mark.asyncio
+    async def test_cors_sets_vary_origin_header(self):
+        adapter = _make_adapter(cors_origins=["http://localhost:3000"])
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/health", headers={"Origin": "http://localhost:3000"})
+            assert resp.status == 200
+            assert resp.headers.get("Vary") == "Origin"
+
    @pytest.mark.asyncio
    async def test_cors_options_preflight_allowed_for_configured_origin(self):
        """Configured origins can complete browser preflight."""
@@ -1319,6 +1368,21 @@ class TestCORS:
            assert "Authorization" in resp.headers.get("Access-Control-Allow-Headers", "")


+    @pytest.mark.asyncio
+    async def test_cors_preflight_sets_max_age(self):
+        adapter = _make_adapter(cors_origins=["http://localhost:3000"])
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.options(
+                "/v1/chat/completions",
+                headers={
+                    "Origin": "http://localhost:3000",
+                    "Access-Control-Request-Method": "POST",
+                    "Access-Control-Request-Headers": "Authorization, Content-Type",
+                },
+            )
+            assert resp.status == 200
+            assert resp.headers.get("Access-Control-Max-Age") == "600"
 # ---------------------------------------------------------------------------
 # Conversation parameter
 # ---------------------------------------------------------------------------
@@ -10,6 +10,7 @@ Covers:
 """

 import asyncio
+import os
 import sys
 from pathlib import Path
 from types import SimpleNamespace
@@ -227,7 +228,8 @@ def test_persist_dm_topic_thread_id_writes_config(tmp_path):

    adapter = _make_adapter()

-    with patch.object(Path, "home", return_value=tmp_path):
+    with patch.object(Path, "home", return_value=tmp_path), \
+         patch.dict(os.environ, {"HERMES_HOME": str(tmp_path / ".hermes")}):
        adapter._persist_dm_topic_thread_id(111, "General", 999)

    with open(config_file) as f:
@@ -366,7 +368,8 @@ def test_get_dm_topic_info_hot_reloads_from_config(tmp_path):
    with open(config_file, "w") as f:
        yaml.dump(config_data, f)

-    with patch.object(Path, "home", return_value=tmp_path):
+    with patch.object(Path, "home", return_value=tmp_path), \
+         patch.dict(os.environ, {"HERMES_HOME": str(tmp_path / ".hermes")}):
        result = adapter._get_dm_topic_info("111", "555")

    assert result is not None
@@ -1,4 +1,5 @@
 """Tests for Matrix platform adapter."""
+import asyncio
 import json
 import re
 import pytest
@@ -446,3 +447,199 @@ class TestMatrixRequirements:
        monkeypatch.delenv("MATRIX_HOMESERVER", raising=False)
        from gateway.platforms.matrix import check_matrix_requirements
        assert check_matrix_requirements() is False
+
+
+# ---------------------------------------------------------------------------
+# Access-token auth / E2EE bootstrap
+# ---------------------------------------------------------------------------
+
+class TestMatrixAccessTokenAuth:
+    @pytest.mark.asyncio
+    async def test_connect_fetches_device_id_from_whoami_for_access_token(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_access_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        class FakeWhoamiResponse:
+            def __init__(self, user_id, device_id):
+                self.user_id = user_id
+                self.device_id = device_id
+
+        class FakeSyncResponse:
+            def __init__(self):
+                self.rooms = MagicMock(join={})
+
+        fake_client = MagicMock()
+        fake_client.whoami = AsyncMock(return_value=FakeWhoamiResponse("@bot:example.org", "DEV123"))
+        fake_client.sync = AsyncMock(return_value=FakeSyncResponse())
+        fake_client.keys_upload = AsyncMock()
+        fake_client.keys_query = AsyncMock()
+        fake_client.keys_claim = AsyncMock()
+        fake_client.send_to_device_messages = AsyncMock(return_value=[])
+        fake_client.get_users_for_key_claiming = MagicMock(return_value={})
+        fake_client.close = AsyncMock()
+        fake_client.add_event_callback = MagicMock()
+        fake_client.rooms = {}
+        fake_client.account_data = {}
+        fake_client.olm = object()
+        fake_client.should_upload_keys = False
+        fake_client.should_query_keys = False
+        fake_client.should_claim_keys = False
+
+        def _restore_login(user_id, device_id, access_token):
+            fake_client.user_id = user_id
+            fake_client.device_id = device_id
+            fake_client.access_token = access_token
+            fake_client.olm = object()
+
+        fake_client.restore_login = MagicMock(side_effect=_restore_login)
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock(return_value=fake_client)
+        fake_nio.WhoamiResponse = FakeWhoamiResponse
+        fake_nio.SyncResponse = FakeSyncResponse
+        fake_nio.LoginResponse = type("LoginResponse", (), {})
+        fake_nio.RoomMessageText = type("RoomMessageText", (), {})
+        fake_nio.RoomMessageImage = type("RoomMessageImage", (), {})
+        fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {})
+        fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {})
+        fake_nio.RoomMessageFile = type("RoomMessageFile", (), {})
+        fake_nio.InviteMemberEvent = type("InviteMemberEvent", (), {})
+        fake_nio.MegolmEvent = type("MegolmEvent", (), {})
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                    assert await adapter.connect() is True
+
+        fake_client.restore_login.assert_called_once_with(
+            "@bot:example.org", "DEV123", "syt_test_access_token"
+        )
+        assert fake_client.access_token == "syt_test_access_token"
+        assert fake_client.user_id == "@bot:example.org"
+        assert fake_client.device_id == "DEV123"
+        fake_client.whoami.assert_awaited_once()
+
+        await adapter.disconnect()
+
+
+class TestMatrixE2EEMaintenance:
+    @pytest.mark.asyncio
+    async def test_sync_loop_runs_e2ee_maintenance_requests(self):
+        adapter = _make_adapter()
+        adapter._encryption = True
+        adapter._closing = False
+
+        class FakeSyncError:
+            pass
+
+        async def _sync_once(timeout=30000):
+            adapter._closing = True
+            return MagicMock()
+
+        fake_client = MagicMock()
+        fake_client.sync = AsyncMock(side_effect=_sync_once)
+        fake_client.send_to_device_messages = AsyncMock(return_value=[])
+        fake_client.keys_upload = AsyncMock()
+        fake_client.keys_query = AsyncMock()
+        fake_client.get_users_for_key_claiming = MagicMock(
+            return_value={"@alice:example.org": ["DEVICE1"]}
+        )
+        fake_client.keys_claim = AsyncMock()
+        fake_client.olm = object()
+        fake_client.should_upload_keys = True
+        fake_client.should_query_keys = True
+        fake_client.should_claim_keys = True
+
+        adapter._client = fake_client
+
+        fake_nio = MagicMock()
+        fake_nio.SyncError = FakeSyncError
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            await adapter._sync_loop()
+
+        fake_client.sync.assert_awaited_once_with(timeout=30000)
+        fake_client.send_to_device_messages.assert_awaited_once()
+        fake_client.keys_upload.assert_awaited_once()
+        fake_client.keys_query.assert_awaited_once()
+        fake_client.keys_claim.assert_awaited_once_with(
+            {"@alice:example.org": ["DEVICE1"]}
+        )
+
+
+class TestMatrixEncryptedSendFallback:
+    @pytest.mark.asyncio
+    async def test_send_retries_with_ignored_unverified_devices(self):
+        adapter = _make_adapter()
+        adapter._encryption = True
+
+        class FakeRoomSendResponse:
+            def __init__(self, event_id):
+                self.event_id = event_id
+
+        class FakeOlmUnverifiedDeviceError(Exception):
+            pass
+
+        fake_client = MagicMock()
+        fake_client.room_send = AsyncMock(side_effect=[
+            FakeOlmUnverifiedDeviceError("unverified"),
+            FakeRoomSendResponse("$event123"),
+        ])
+        adapter._client = fake_client
+        adapter._run_e2ee_maintenance = AsyncMock()
+
+        fake_nio = MagicMock()
+        fake_nio.RoomSendResponse = FakeRoomSendResponse
+        fake_nio.OlmUnverifiedDeviceError = FakeOlmUnverifiedDeviceError
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await adapter.send("!room:example.org", "hello")
+
+        assert result.success is True
+        assert result.message_id == "$event123"
+        adapter._run_e2ee_maintenance.assert_awaited_once()
+        assert fake_client.room_send.await_count == 2
+        first_call = fake_client.room_send.await_args_list[0]
+        second_call = fake_client.room_send.await_args_list[1]
+        assert first_call.kwargs.get("ignore_unverified_devices") is False
+        assert second_call.kwargs.get("ignore_unverified_devices") is True
+
+    @pytest.mark.asyncio
+    async def test_send_retries_after_timeout_in_encrypted_room(self):
+        adapter = _make_adapter()
+        adapter._encryption = True
+
+        class FakeRoomSendResponse:
+            def __init__(self, event_id):
+                self.event_id = event_id
+
+        fake_client = MagicMock()
+        fake_client.room_send = AsyncMock(side_effect=[
+            asyncio.TimeoutError(),
+            FakeRoomSendResponse("$event456"),
+        ])
+        adapter._client = fake_client
+        adapter._run_e2ee_maintenance = AsyncMock()
+
+        fake_nio = MagicMock()
+        fake_nio.RoomSendResponse = FakeRoomSendResponse
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await adapter.send("!room:example.org", "hello")
+
+        assert result.success is True
+        assert result.message_id == "$event456"
+        adapter._run_e2ee_maintenance.assert_awaited_once()
+        assert fake_client.room_send.await_count == 2
+        second_call = fake_client.room_send.await_args_list[1]
+        assert second_call.kwargs.get("ignore_unverified_devices") is True
@@ -171,6 +171,170 @@ class TestCacheImageFromUrl:
        mock_sleep.assert_not_called()


+# ---------------------------------------------------------------------------
+# cache_audio_from_url (base.py)
+# ---------------------------------------------------------------------------
+
+class TestCacheAudioFromUrl:
+    """Tests for gateway.platforms.base.cache_audio_from_url"""
+
+    def test_success_on_first_attempt(self, tmp_path, monkeypatch):
+        """A clean 200 response caches the audio and returns a path."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        fake_response = MagicMock()
+        fake_response.content = b"\x00\x01 fake audio"
+        fake_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(return_value=fake_response)
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client):
+                from gateway.platforms.base import cache_audio_from_url
+                return await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg"
+                )
+
+        path = asyncio.run(run())
+        assert path.endswith(".ogg")
+        mock_client.get.assert_called_once()
+
+    def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
+        """A timeout on the first attempt is retried; second attempt succeeds."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        fake_response = MagicMock()
+        fake_response.content = b"audio data"
+        fake_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(
+            side_effect=[_make_timeout_error(), fake_response]
+        )
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        mock_sleep = AsyncMock()
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client), \
+                 patch("asyncio.sleep", mock_sleep):
+                from gateway.platforms.base import cache_audio_from_url
+                return await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg", retries=2
+                )
+
+        path = asyncio.run(run())
+        assert path.endswith(".ogg")
+        assert mock_client.get.call_count == 2
+        mock_sleep.assert_called_once()
+
+    def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch):
+        """A 429 response on the first attempt is retried; second attempt succeeds."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        ok_response = MagicMock()
+        ok_response.content = b"audio data"
+        ok_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(
+            side_effect=[_make_http_status_error(429), ok_response]
+        )
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client), \
+                 patch("asyncio.sleep", new_callable=AsyncMock):
+                from gateway.platforms.base import cache_audio_from_url
+                return await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg", retries=2
+                )
+
+        path = asyncio.run(run())
+        assert path.endswith(".ogg")
+        assert mock_client.get.call_count == 2
+
+    def test_retries_on_500_then_succeeds(self, tmp_path, monkeypatch):
+        """A 500 response on the first attempt is retried; second attempt succeeds."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        ok_response = MagicMock()
+        ok_response.content = b"audio data"
+        ok_response.raise_for_status = MagicMock()
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(
+            side_effect=[_make_http_status_error(500), ok_response]
+        )
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client), \
+                 patch("asyncio.sleep", new_callable=AsyncMock):
+                from gateway.platforms.base import cache_audio_from_url
+                return await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg", retries=2
+                )
+
+        path = asyncio.run(run())
+        assert path.endswith(".ogg")
+        assert mock_client.get.call_count == 2
+
+    def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch):
+        """Timeout on every attempt raises after all retries are consumed."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(side_effect=_make_timeout_error())
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client), \
+                 patch("asyncio.sleep", new_callable=AsyncMock):
+                from gateway.platforms.base import cache_audio_from_url
+                await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg", retries=2
+                )
+
+        with pytest.raises(httpx.TimeoutException):
+            asyncio.run(run())
+
+        # 3 total calls: initial + 2 retries
+        assert mock_client.get.call_count == 3
+
+    def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch):
+        """A 404 (non-retryable) is raised immediately without any retry."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        mock_sleep = AsyncMock()
+        mock_client = AsyncMock()
+        mock_client.get = AsyncMock(side_effect=_make_http_status_error(404))
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        async def run():
+            with patch("httpx.AsyncClient", return_value=mock_client), \
+                 patch("asyncio.sleep", mock_sleep):
+                from gateway.platforms.base import cache_audio_from_url
+                await cache_audio_from_url(
+                    "http://example.com/voice.ogg", ext=".ogg", retries=2
+                )
+
+        with pytest.raises(httpx.HTTPStatusError):
+            asyncio.run(run())
+
+        # Only 1 attempt, no sleep
+        assert mock_client.get.call_count == 1
+        mock_sleep.assert_not_called()
+
+
 # ---------------------------------------------------------------------------
 # Slack mock setup (mirrors existing test_slack.py approach)
 # ---------------------------------------------------------------------------
@@ -62,6 +62,18 @@ class TestMessageEventGetCommand:
        event = MessageEvent(text="/")
        assert event.get_command() == ""

+    def test_command_with_at_botname(self):
+        event = MessageEvent(text="/new@TigerNanoBot")
+        assert event.get_command() == "new"
+
+    def test_command_with_at_botname_and_args(self):
+        event = MessageEvent(text="/compress@TigerNanoBot")
+        assert event.get_command() == "compress"
+
+    def test_command_mixed_case_with_at_botname(self):
+        event = MessageEvent(text="/RESET@TigerNanoBot")
+        assert event.get_command() == "reset"
+

 class TestMessageEventGetCommandArgs:
    def test_command_with_args(self):
@@ -344,6 +344,7 @@ class TestRuntimeDisconnectQueuing:
    async def test_retryable_runtime_error_queued_for_reconnect(self):
        """Retryable runtime errors should add the platform to _failed_platforms."""
        runner = _make_runner()
+        runner.stop = AsyncMock()

        adapter = StubAdapter(succeed=True)
        adapter._set_fatal_error("network_error", "DNS failure", retryable=True)
@@ -371,8 +372,12 @@ class TestRuntimeDisconnectQueuing:
        assert Platform.TELEGRAM not in runner._failed_platforms

    @pytest.mark.asyncio
-    async def test_retryable_error_prevents_shutdown_when_queued(self):
-        """Gateway should not shut down if failed platforms are queued for reconnection."""
+    async def test_retryable_error_exits_for_service_restart_when_all_down(self):
+        """Gateway should exit with failure when all platforms fail with retryable errors.
+
+        This lets systemd Restart=on-failure restart the process, which is more
+        reliable than in-process background reconnection after exhausted retries.
+        """
        runner = _make_runner()
        runner.stop = AsyncMock()

@@ -382,7 +387,28 @@ class TestRuntimeDisconnectQueuing:

        await runner._handle_adapter_fatal_error(adapter)

-        # stop() should NOT have been called since we have platforms queued
+        # stop() SHOULD be called — gateway exits for systemd restart
+        runner.stop.assert_called_once()
+        assert runner._exit_with_failure is True
+        assert Platform.TELEGRAM in runner._failed_platforms
+
+    @pytest.mark.asyncio
+    async def test_retryable_error_no_exit_when_other_adapters_still_connected(self):
+        """Gateway should NOT exit if some adapters are still connected."""
+        runner = _make_runner()
+        runner.stop = AsyncMock()
+
+        failing_adapter = StubAdapter(succeed=True)
+        failing_adapter._set_fatal_error("network_error", "DNS failure", retryable=True)
+        runner.adapters[Platform.TELEGRAM] = failing_adapter
+
+        # Another adapter is still connected
+        healthy_adapter = StubAdapter(succeed=True)
+        runner.adapters[Platform.DISCORD] = healthy_adapter
+
+        await runner._handle_adapter_fatal_error(failing_adapter)
+
+        # stop() should NOT have been called — Discord is still up
        runner.stop.assert_not_called()
        assert Platform.TELEGRAM in runner._failed_platforms

@@ -14,8 +14,8 @@ from gateway.session import SessionSource


 class ProgressCaptureAdapter(BasePlatformAdapter):
-    def __init__(self):
-        super().__init__(PlatformConfig(enabled=True, token="fake-token"), Platform.TELEGRAM)
+    def __init__(self, platform=Platform.TELEGRAM):
+        super().__init__(PlatformConfig(enabled=True, token="***"), platform)
        self.sent = []
        self.edits = []
        self.typing = []
@@ -76,7 +76,7 @@ def _make_runner(adapter):
    GatewayRunner = gateway_run.GatewayRunner

    runner = object.__new__(GatewayRunner)
-    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner.adapters = {adapter.platform: adapter}
    runner._voice_mode = {}
    runner._prefill_messages = []
    runner._ephemeral_system_prompt = ""
@@ -133,3 +133,87 @@ async def test_run_agent_progress_stays_in_originating_topic(monkeypatch, tmp_pa
    ]
    assert adapter.edits
    assert all(call["metadata"] == {"thread_id": "17585"} for call in adapter.typing)
+
+
+@pytest.mark.asyncio
+async def test_run_agent_progress_does_not_use_event_message_id_for_telegram_dm(monkeypatch, tmp_path):
+    """Telegram DM progress must not reuse event message id as thread metadata."""
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = FakeAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter(platform=Platform.TELEGRAM)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="12345",
+        chat_type="dm",
+        thread_id=None,
+    )
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-2",
+        session_key="agent:main:telegram:dm:12345",
+        event_message_id="777",
+    )
+
+    assert result["final_response"] == "done"
+    assert adapter.sent
+    assert adapter.sent[0]["metadata"] is None
+    assert all(call["metadata"] is None for call in adapter.typing)
+
+
+@pytest.mark.asyncio
+async def test_run_agent_progress_uses_event_message_id_for_slack_dm(monkeypatch, tmp_path):
+    """Slack DM progress should keep event ts fallback threading."""
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = FakeAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    adapter = ProgressCaptureAdapter(platform=Platform.SLACK)
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.SLACK,
+        chat_id="D123",
+        chat_type="dm",
+        thread_id=None,
+    )
+
+    result = await runner._run_agent(
+        message="hello",
+        context_prompt="",
+        history=[],
+        source=source,
+        session_id="sess-3",
+        session_key="agent:main:slack:dm:D123",
+        event_message_id="1234567890.000001",
+    )
+
+    assert result["final_response"] == "done"
+    assert adapter.sent
+    assert adapter.sent[0]["metadata"] == {"thread_id": "1234567890.000001"}
+    assert all(call["metadata"] == {"thread_id": "1234567890.000001"} for call in adapter.typing)
@@ -89,7 +89,8 @@ async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatc

    await runner._handle_adapter_fatal_error(adapter)

-    # Should NOT shut down — platform is queued for reconnection
-    runner.stop.assert_not_awaited()
+    # Should shut down with failure — systemd Restart=on-failure will restart
+    runner.stop.assert_awaited_once()
+    assert runner._exit_with_failure is True
    assert Platform.WHATSAPP in runner._failed_platforms
    assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0
@@ -304,8 +304,12 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
    class FakeCompressAgent:
        def __init__(self, **kwargs):
            self.model = kwargs.get("model")
+            self.session_id = kwargs.get("session_id", "fake-session")
+            self._print_fn = None

        def _compress_context(self, messages, *_args, **_kwargs):
+            # Simulate real _compress_context: create a new session_id
+            self.session_id = f"{self.session_id}_compressed"
            return ([{"role": "assistant", "content": "compressed"}], None)

    fake_run_agent = types.ModuleType("run_agent")
@@ -315,6 +315,24 @@ class TestFallbackTransportInit:
        transport = tnet.TelegramFallbackTransport(["149.154.167.220", "not-an-ip"])
        assert transport._fallback_ips == ["149.154.167.220"]

+    def test_uses_proxy_env_for_primary_and_fallback_transports(self, monkeypatch):
+        seen_kwargs = []
+
+        def factory(**kwargs):
+            seen_kwargs.append(kwargs.copy())
+            return FakeTransport([], {})
+
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
+        monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)
+
+        transport = tnet.TelegramFallbackTransport(["149.154.167.220"])
+
+        assert transport._fallback_ips == ["149.154.167.220"]
+        assert len(seen_kwargs) == 2
+        assert all(kwargs["proxy"] == "http://proxy.example:8080" for kwargs in seen_kwargs)
+

 class TestFallbackTransportClose:
    @pytest.mark.asyncio
@@ -0,0 +1,87 @@
+"""Tests for webhook adapter dynamic route loading."""
+
+import json
+import os
+import pytest
+from pathlib import Path
+
+from gateway.config import PlatformConfig
+from gateway.platforms.webhook import WebhookAdapter, _DYNAMIC_ROUTES_FILENAME
+
+
+def _make_adapter(routes=None, extra=None):
+    _extra = extra or {}
+    if routes:
+        _extra["routes"] = routes
+    _extra.setdefault("secret", "test-global-secret")
+    config = PlatformConfig(enabled=True, extra=_extra)
+    return WebhookAdapter(config)
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+
+class TestDynamicRouteLoading:
+    def test_no_dynamic_file(self):
+        adapter = _make_adapter(routes={"static": {"secret": "s"}})
+        adapter._reload_dynamic_routes()
+        assert "static" in adapter._routes
+        assert len(adapter._dynamic_routes) == 0
+
+    def test_loads_dynamic_routes(self, tmp_path):
+        subs = {"my-hook": {"secret": "dynamic-secret", "prompt": "test", "events": []}}
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(json.dumps(subs))
+
+        adapter = _make_adapter(routes={"static": {"secret": "s"}})
+        adapter._reload_dynamic_routes()
+        assert "my-hook" in adapter._routes
+        assert "static" in adapter._routes
+
+    def test_static_takes_precedence(self, tmp_path):
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"conflict": {"secret": "dynamic", "prompt": "dyn"}})
+        )
+        adapter = _make_adapter(routes={"conflict": {"secret": "static", "prompt": "stat"}})
+        adapter._reload_dynamic_routes()
+        assert adapter._routes["conflict"]["secret"] == "static"
+
+    def test_mtime_gated(self, tmp_path):
+        import time
+        path = tmp_path / _DYNAMIC_ROUTES_FILENAME
+        path.write_text(json.dumps({"v1": {"secret": "s"}}))
+
+        adapter = _make_adapter()
+        adapter._reload_dynamic_routes()
+        assert "v1" in adapter._dynamic_routes
+
+        # Same mtime — no reload
+        adapter._dynamic_routes["injected"] = True
+        adapter._reload_dynamic_routes()
+        assert "injected" in adapter._dynamic_routes
+
+        # New write — reloads
+        time.sleep(0.05)
+        path.write_text(json.dumps({"v2": {"secret": "s"}}))
+        adapter._reload_dynamic_routes()
+        assert "v2" in adapter._dynamic_routes
+        assert "v1" not in adapter._dynamic_routes
+
+    def test_file_removal_clears(self, tmp_path):
+        path = tmp_path / _DYNAMIC_ROUTES_FILENAME
+        path.write_text(json.dumps({"temp": {"secret": "s"}}))
+        adapter = _make_adapter()
+        adapter._reload_dynamic_routes()
+        assert "temp" in adapter._dynamic_routes
+
+        path.unlink()
+        adapter._reload_dynamic_routes()
+        assert len(adapter._dynamic_routes) == 0
+
+    def test_corrupted_file(self, tmp_path):
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text("not json")
+        adapter = _make_adapter(routes={"static": {"secret": "s"}})
+        adapter._reload_dynamic_routes()
+        assert "static" in adapter._routes
+        assert len(adapter._dynamic_routes) == 0
@@ -105,3 +105,24 @@ class TestCmdUpdateBranchFallback:
        commands = [" ".join(str(a) for a in c.args[0]) for c in mock_run.call_args_list]
        pull_cmds = [c for c in commands if "pull" in c]
        assert len(pull_cmds) == 0
+
+    def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
+        """When stdin/stdout aren't TTYs, config migration prompt is skipped."""
+        with patch("shutil.which", return_value=None), patch(
+            "subprocess.run"
+        ) as mock_run, patch("builtins.input") as mock_input, patch(
+            "hermes_cli.config.get_missing_env_vars", return_value=["MISSING_KEY"]
+        ), patch("hermes_cli.config.get_missing_config_fields", return_value=[]), patch(
+            "hermes_cli.config.check_config_version", return_value=(1, 2)
+        ), patch("hermes_cli.main.sys") as mock_sys:
+            mock_sys.stdin.isatty.return_value = False
+            mock_sys.stdout.isatty.return_value = False
+            mock_run.side_effect = _make_run_side_effect(
+                branch="main", verify_ok=True, commit_count="1"
+            )
+
+            cmd_update(mock_args)
+
+            mock_input.assert_not_called()
+            captured = capsys.readouterr()
+            assert "Non-interactive session" in captured.out
@@ -1,6 +1,7 @@
 """Tests for gateway service management helpers."""

 import os
+from pathlib import Path
 from types import SimpleNamespace

 import hermes_cli.gateway as gateway_cli
@@ -152,12 +153,13 @@ class TestLaunchdServiceRecovery:
    def test_launchd_start_reloads_unloaded_job_and_retries(self, tmp_path, monkeypatch):
        plist_path = tmp_path / "ai.hermes.gateway.plist"
        plist_path.write_text(gateway_cli.generate_launchd_plist(), encoding="utf-8")
+        label = gateway_cli.get_launchd_label()

        calls = []

        def fake_run(cmd, check=False, **kwargs):
            calls.append(cmd)
-            if cmd == ["launchctl", "start", "ai.hermes.gateway"] and calls.count(cmd) == 1:
+            if cmd == ["launchctl", "start", label] and calls.count(cmd) == 1:
                raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
            return SimpleNamespace(returncode=0, stdout="", stderr="")

@@ -167,9 +169,9 @@ class TestLaunchdServiceRecovery:
        gateway_cli.launchd_start()

        assert calls == [
-            ["launchctl", "start", "ai.hermes.gateway"],
+            ["launchctl", "start", label],
            ["launchctl", "load", str(plist_path)],
-            ["launchctl", "start", "ai.hermes.gateway"],
+            ["launchctl", "start", label],
        ]

    def test_launchd_status_reports_local_stale_plist_when_unloaded(self, tmp_path, monkeypatch, capsys):
@@ -354,6 +356,20 @@ class TestGeneratedUnitUsesDetectedVenv:
        assert "/venv/" not in unit or "/.venv/" in unit


+class TestGeneratedUnitIncludesLocalBin:
+    """~/.local/bin must be in PATH so uvx/pipx tools are discoverable."""
+
+    def test_user_unit_includes_local_bin_in_path(self):
+        unit = gateway_cli.generate_systemd_unit(system=False)
+        home = str(Path.home())
+        assert f"{home}/.local/bin" in unit
+
+    def test_system_unit_includes_local_bin_in_path(self):
+        unit = gateway_cli.generate_systemd_unit(system=True)
+        # System unit uses the resolved home dir from _system_service_identity
+        assert "/.local/bin" in unit
+
+
 class TestEnsureUserSystemdEnv:
    """Tests for _ensure_user_systemd_env() D-Bus session bus auto-detection."""

@@ -1,10 +1,13 @@
 """
-Tests for skip_confirm behavior in /skills install and /skills uninstall.
+Tests for skip_confirm and invalidate_cache behavior in /skills install
+and /skills uninstall slash commands.

-Verifies that --yes / -y bypasses the interactive confirmation prompt
-that hangs inside prompt_toolkit's TUI.
+Slash commands always skip confirmation (input() hangs in TUI).
+Cache invalidation is deferred by default; --now opts into immediate
+invalidation (at the cost of breaking prompt cache mid-session).

 Based on PR #1595 by 333Alden333 (salvaged).
+Updated for PR #3586 (cache-aware install/uninstall).
 """

 from unittest.mock import patch, MagicMock
@@ -32,23 +35,43 @@ class TestHandleSkillsSlashInstallFlags:
            _, kwargs = mock_install.call_args
            assert kwargs.get("skip_confirm") is True

-    def test_force_flag_sets_force_not_skip(self):
+    def test_force_flag_sets_force(self):
        from hermes_cli.skills_hub import handle_skills_slash
        with patch("hermes_cli.skills_hub.do_install") as mock_install:
            handle_skills_slash("/skills install test/skill --force")
            mock_install.assert_called_once()
            _, kwargs = mock_install.call_args
            assert kwargs.get("force") is True
-            assert kwargs.get("skip_confirm") is False
+            # Slash commands always skip confirmation (input() hangs in TUI)
+            assert kwargs.get("skip_confirm") is True

-    def test_no_flags(self):
+    def test_no_flags_still_skips_confirm(self):
+        """Slash commands always skip confirmation — input() hangs in TUI."""
        from hermes_cli.skills_hub import handle_skills_slash
        with patch("hermes_cli.skills_hub.do_install") as mock_install:
            handle_skills_slash("/skills install test/skill")
            mock_install.assert_called_once()
            _, kwargs = mock_install.call_args
            assert kwargs.get("force") is False
-            assert kwargs.get("skip_confirm") is False
+            assert kwargs.get("skip_confirm") is True
+
+    def test_default_defers_cache_invalidation(self):
+        """Without --now, cache invalidation is deferred to next session."""
+        from hermes_cli.skills_hub import handle_skills_slash
+        with patch("hermes_cli.skills_hub.do_install") as mock_install:
+            handle_skills_slash("/skills install test/skill")
+            mock_install.assert_called_once()
+            _, kwargs = mock_install.call_args
+            assert kwargs.get("invalidate_cache") is False
+
+    def test_now_flag_invalidates_cache(self):
+        """--now opts into immediate cache invalidation."""
+        from hermes_cli.skills_hub import handle_skills_slash
+        with patch("hermes_cli.skills_hub.do_install") as mock_install:
+            handle_skills_slash("/skills install test/skill --now")
+            mock_install.assert_called_once()
+            _, kwargs = mock_install.call_args
+            assert kwargs.get("invalidate_cache") is True


 class TestHandleSkillsSlashUninstallFlags:
@@ -70,13 +93,32 @@ class TestHandleSkillsSlashUninstallFlags:
            _, kwargs = mock_uninstall.call_args
            assert kwargs.get("skip_confirm") is True

-    def test_no_flags(self):
+    def test_no_flags_still_skips_confirm(self):
+        """Slash commands always skip confirmation — input() hangs in TUI."""
        from hermes_cli.skills_hub import handle_skills_slash
        with patch("hermes_cli.skills_hub.do_uninstall") as mock_uninstall:
            handle_skills_slash("/skills uninstall test-skill")
            mock_uninstall.assert_called_once()
            _, kwargs = mock_uninstall.call_args
-            assert kwargs.get("skip_confirm", False) is False
+            assert kwargs.get("skip_confirm") is True
+
+    def test_default_defers_cache_invalidation(self):
+        """Without --now, cache invalidation is deferred to next session."""
+        from hermes_cli.skills_hub import handle_skills_slash
+        with patch("hermes_cli.skills_hub.do_uninstall") as mock_uninstall:
+            handle_skills_slash("/skills uninstall test-skill")
+            mock_uninstall.assert_called_once()
+            _, kwargs = mock_uninstall.call_args
+            assert kwargs.get("invalidate_cache") is False
+
+    def test_now_flag_invalidates_cache(self):
+        """--now opts into immediate cache invalidation."""
+        from hermes_cli.skills_hub import handle_skills_slash
+        with patch("hermes_cli.skills_hub.do_uninstall") as mock_uninstall:
+            handle_skills_slash("/skills uninstall test-skill --now")
+            mock_uninstall.assert_called_once()
+            _, kwargs = mock_uninstall.call_args
+            assert kwargs.get("invalidate_cache") is True


 class TestDoInstallSkipConfirm:
@@ -237,3 +237,53 @@ def test_save_platform_tools_still_preserves_mcp_with_platform_default_present()

    # Deselected configurable toolset removed
    assert "terminal" not in saved
+
+
+# ── Platform / toolset consistency ────────────────────────────────────────────
+
+
+class TestPlatformToolsetConsistency:
+    """Every platform in tools_config.PLATFORMS must have a matching toolset."""
+
+    def test_all_platforms_have_toolset_definitions(self):
+        """Each platform's default_toolset must exist in TOOLSETS."""
+        from hermes_cli.tools_config import PLATFORMS
+        from toolsets import TOOLSETS
+
+        for platform, meta in PLATFORMS.items():
+            ts_name = meta["default_toolset"]
+            assert ts_name in TOOLSETS, (
+                f"Platform {platform!r} references toolset {ts_name!r} "
+                f"which is not defined in toolsets.py"
+            )
+
+    def test_gateway_toolset_includes_all_messaging_platforms(self):
+        """hermes-gateway includes list should cover all messaging platforms."""
+        from hermes_cli.tools_config import PLATFORMS
+        from toolsets import TOOLSETS
+
+        gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
+        # Exclude non-messaging platforms from the check
+        non_messaging = {"cli", "api_server"}
+        for platform, meta in PLATFORMS.items():
+            if platform in non_messaging:
+                continue
+            ts_name = meta["default_toolset"]
+            assert ts_name in gateway_includes, (
+                f"Platform {platform!r} toolset {ts_name!r} missing from "
+                f"hermes-gateway includes"
+            )
+
+    def test_skills_config_covers_tools_config_platforms(self):
+        """skills_config.PLATFORMS should have entries for all gateway platforms."""
+        from hermes_cli.tools_config import PLATFORMS as TOOLS_PLATFORMS
+        from hermes_cli.skills_config import PLATFORMS as SKILLS_PLATFORMS
+
+        non_messaging = {"api_server"}
+        for platform in TOOLS_PLATFORMS:
+            if platform in non_messaging:
+                continue
+            assert platform in SKILLS_PLATFORMS, (
+                f"Platform {platform!r} in tools_config but missing from "
+                f"skills_config PLATFORMS"
+            )
@@ -267,7 +267,8 @@ def test_restore_stashed_changes_user_declines_reset(monkeypatch, tmp_path, caps


 def test_restore_stashed_changes_auto_resets_non_interactive(monkeypatch, tmp_path, capsys):
-    """Non-interactive mode auto-resets without prompting."""
+    """Non-interactive mode auto-resets without prompting and returns False
+    instead of sys.exit(1) so the update can continue (gateway /update path)."""
    calls = []

    def fake_run(cmd, **kwargs):
@@ -282,9 +283,9 @@ def test_restore_stashed_changes_auto_resets_non_interactive(monkeypatch, tmp_pa

    monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)

-    with pytest.raises(SystemExit, match="1"):
-        hermes_main._restore_stashed_changes(["git"], tmp_path, "abc123", prompt_user=False)
+    result = hermes_main._restore_stashed_changes(["git"], tmp_path, "abc123", prompt_user=False)

+    assert result is False
    out = capsys.readouterr().out
    assert "Working tree reset to clean state" in out
    reset_calls = [c for c, _ in calls if c[1:3] == ["reset", "--hard"]]
@@ -384,3 +385,236 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
    install_cmds = [c for c in recorded if "pip" in c and "install" in c]
    assert len(install_cmds) == 1
    assert ".[all]" in install_cmds[0]
+
+
+# ---------------------------------------------------------------------------
+# ff-only fallback to reset --hard on diverged history
+# ---------------------------------------------------------------------------
+
+def _make_update_side_effect(
+    current_branch="main",
+    commit_count="3",
+    ff_only_fails=False,
+    reset_fails=False,
+    fetch_fails=False,
+    fetch_stderr="",
+):
+    """Build a subprocess.run side_effect for cmd_update tests."""
+    recorded = []
+
+    def side_effect(cmd, **kwargs):
+        recorded.append(cmd)
+        joined = " ".join(str(c) for c in cmd)
+        if "fetch" in joined and "origin" in joined:
+            if fetch_fails:
+                return SimpleNamespace(stdout="", stderr=fetch_stderr, returncode=128)
+            return SimpleNamespace(stdout="", stderr="", returncode=0)
+        if "rev-parse" in joined and "--abbrev-ref" in joined:
+            return SimpleNamespace(stdout=f"{current_branch}\n", stderr="", returncode=0)
+        if "checkout" in joined and "main" in joined:
+            return SimpleNamespace(stdout="", stderr="", returncode=0)
+        if "rev-list" in joined:
+            return SimpleNamespace(stdout=f"{commit_count}\n", stderr="", returncode=0)
+        if "--ff-only" in joined:
+            if ff_only_fails:
+                return SimpleNamespace(
+                    stdout="",
+                    stderr="fatal: Not possible to fast-forward, aborting.\n",
+                    returncode=128,
+                )
+            return SimpleNamespace(stdout="Updating abc..def\n", stderr="", returncode=0)
+        if "reset" in joined and "--hard" in joined:
+            if reset_fails:
+                return SimpleNamespace(stdout="", stderr="error: unable to write\n", returncode=1)
+            return SimpleNamespace(stdout="HEAD is now at abc123\n", stderr="", returncode=0)
+        return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+    return side_effect, recorded
+
+
+def test_cmd_update_falls_back_to_reset_when_ff_only_fails(monkeypatch, tmp_path, capsys):
+    """When --ff-only fails (diverged history), update resets to origin/{branch}."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    side_effect, recorded = _make_update_side_effect(ff_only_fails=True)
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    reset_calls = [c for c in recorded if "reset" in c and "--hard" in c]
+    assert len(reset_calls) == 1
+    assert reset_calls[0] == ["git", "reset", "--hard", "origin/main"]
+
+    out = capsys.readouterr().out
+    assert "Fast-forward not possible" in out
+
+
+def test_cmd_update_no_reset_when_ff_only_succeeds(monkeypatch, tmp_path):
+    """When --ff-only succeeds, no reset is attempted."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    side_effect, recorded = _make_update_side_effect()
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    reset_calls = [c for c in recorded if "reset" in c and "--hard" in c]
+    assert len(reset_calls) == 0
+
+
+# ---------------------------------------------------------------------------
+# Non-main branch → auto-checkout main
+# ---------------------------------------------------------------------------
+
+def test_cmd_update_switches_to_main_from_feature_branch(monkeypatch, tmp_path, capsys):
+    """When on a feature branch, update checks out main before pulling."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    side_effect, recorded = _make_update_side_effect(current_branch="fix/something")
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    checkout_calls = [c for c in recorded if "checkout" in c and "main" in c]
+    assert len(checkout_calls) == 1
+
+    out = capsys.readouterr().out
+    assert "fix/something" in out
+    assert "switching to main" in out
+
+
+def test_cmd_update_switches_to_main_from_detached_head(monkeypatch, tmp_path, capsys):
+    """When in detached HEAD state, update checks out main before pulling."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    side_effect, recorded = _make_update_side_effect(current_branch="HEAD")
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    checkout_calls = [c for c in recorded if "checkout" in c and "main" in c]
+    assert len(checkout_calls) == 1
+
+    out = capsys.readouterr().out
+    assert "detached HEAD" in out
+
+
+def test_cmd_update_restores_stash_and_branch_when_already_up_to_date(monkeypatch, tmp_path, capsys):
+    """When on a feature branch with no updates, stash is restored and branch switched back."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    # Enable stash so it returns a ref
+    monkeypatch.setattr(
+        hermes_main, "_stash_local_changes_if_needed",
+        lambda *a, **kw: "abc123deadbeef",
+    )
+    restore_calls = []
+    monkeypatch.setattr(
+        hermes_main, "_restore_stashed_changes",
+        lambda *a, **kw: restore_calls.append(1) or True,
+    )
+
+    side_effect, recorded = _make_update_side_effect(
+        current_branch="fix/something", commit_count="0",
+    )
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    # Stash should have been restored
+    assert len(restore_calls) == 1
+
+    # Should have checked out back to the original branch
+    checkout_back = [c for c in recorded if "checkout" in c and "fix/something" in c]
+    assert len(checkout_back) == 1
+
+    out = capsys.readouterr().out
+    assert "Already up to date" in out
+
+
+def test_cmd_update_no_checkout_when_already_on_main(monkeypatch, tmp_path):
+    """When already on main, no checkout is needed."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
+
+    side_effect, recorded = _make_update_side_effect()
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    hermes_main.cmd_update(SimpleNamespace())
+
+    checkout_calls = [c for c in recorded if "checkout" in c]
+    assert len(checkout_calls) == 0
+
+
+# ---------------------------------------------------------------------------
+# Fetch failure — friendly error messages
+# ---------------------------------------------------------------------------
+
+def test_cmd_update_network_error_shows_friendly_message(monkeypatch, tmp_path, capsys):
+    """Network failures during fetch show a user-friendly message."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+
+    side_effect, _ = _make_update_side_effect(
+        fetch_fails=True,
+        fetch_stderr="fatal: unable to access 'https://...': Could not resolve host: github.com",
+    )
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    with pytest.raises(SystemExit, match="1"):
+        hermes_main.cmd_update(SimpleNamespace())
+
+    out = capsys.readouterr().out
+    assert "Network error" in out
+
+
+def test_cmd_update_auth_error_shows_friendly_message(monkeypatch, tmp_path, capsys):
+    """Auth failures during fetch show a user-friendly message."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+
+    side_effect, _ = _make_update_side_effect(
+        fetch_fails=True,
+        fetch_stderr="fatal: Authentication failed for 'https://...'",
+    )
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    with pytest.raises(SystemExit, match="1"):
+        hermes_main.cmd_update(SimpleNamespace())
+
+    out = capsys.readouterr().out
+    assert "Authentication failed" in out
+
+
+# ---------------------------------------------------------------------------
+# reset --hard failure — don't attempt stash restore
+# ---------------------------------------------------------------------------
+
+def test_cmd_update_skips_stash_restore_when_reset_fails(monkeypatch, tmp_path, capsys):
+    """When reset --hard fails, stash restore is skipped with a helpful message."""
+    _setup_update_mocks(monkeypatch, tmp_path)
+    # Re-enable stash so it actually returns a ref
+    monkeypatch.setattr(
+        hermes_main, "_stash_local_changes_if_needed",
+        lambda *a, **kw: "abc123deadbeef",
+    )
+    restore_calls = []
+    monkeypatch.setattr(
+        hermes_main, "_restore_stashed_changes",
+        lambda *a, **kw: restore_calls.append(1) or True,
+    )
+
+    side_effect, _ = _make_update_side_effect(ff_only_fails=True, reset_fails=True)
+    monkeypatch.setattr(hermes_main.subprocess, "run", side_effect)
+
+    with pytest.raises(SystemExit, match="1"):
+        hermes_main.cmd_update(SimpleNamespace())
+
+    # Stash restore should NOT have been called
+    assert len(restore_calls) == 0
+
+    out = capsys.readouterr().out
+    assert "preserved in stash" in out
@@ -101,6 +101,69 @@ class TestLaunchdPlistReplace:
        assert replace_idx == run_idx + 1


+class TestLaunchdPlistPath:
+    def test_plist_contains_environment_variables(self):
+        plist = gateway_cli.generate_launchd_plist()
+        assert "<key>EnvironmentVariables</key>" in plist
+        assert "<key>PATH</key>" in plist
+        assert "<key>VIRTUAL_ENV</key>" in plist
+        assert "<key>HERMES_HOME</key>" in plist
+
+    def test_plist_path_includes_venv_bin(self):
+        plist = gateway_cli.generate_launchd_plist()
+        detected = gateway_cli._detect_venv_dir()
+        venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
+        assert venv_bin in plist
+
+    def test_plist_path_starts_with_venv_bin(self):
+        plist = gateway_cli.generate_launchd_plist()
+        lines = plist.splitlines()
+        for i, line in enumerate(lines):
+            if "<key>PATH</key>" in line.strip():
+                path_value = lines[i + 1].strip()
+                path_value = path_value.replace("<string>", "").replace("</string>", "")
+                detected = gateway_cli._detect_venv_dir()
+                venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
+                assert path_value.startswith(venv_bin + ":")
+                break
+        else:
+            raise AssertionError("PATH key not found in plist")
+
+    def test_plist_path_includes_node_modules_bin(self):
+        plist = gateway_cli.generate_launchd_plist()
+        node_bin = str(gateway_cli.PROJECT_ROOT / "node_modules" / ".bin")
+        lines = plist.splitlines()
+        for i, line in enumerate(lines):
+            if "<key>PATH</key>" in line.strip():
+                path_value = lines[i + 1].strip()
+                path_value = path_value.replace("<string>", "").replace("</string>", "")
+                assert node_bin in path_value.split(":")
+                break
+        else:
+            raise AssertionError("PATH key not found in plist")
+
+    def test_plist_path_includes_current_env_path(self, monkeypatch):
+        monkeypatch.setenv("PATH", "/custom/bin:/usr/bin:/bin")
+        plist = gateway_cli.generate_launchd_plist()
+        assert "/custom/bin" in plist
+
+    def test_plist_path_deduplicates_venv_bin_when_already_in_path(self, monkeypatch):
+        detected = gateway_cli._detect_venv_dir()
+        venv_bin = str(detected / "bin") if detected else str(gateway_cli.PROJECT_ROOT / "venv" / "bin")
+        monkeypatch.setenv("PATH", f"{venv_bin}:/usr/bin:/bin")
+        plist = gateway_cli.generate_launchd_plist()
+        lines = plist.splitlines()
+        for i, line in enumerate(lines):
+            if "<key>PATH</key>" in line.strip():
+                path_value = lines[i + 1].strip()
+                path_value = path_value.replace("<string>", "").replace("</string>", "")
+                parts = path_value.split(":")
+                assert parts.count(venv_bin) == 1
+                break
+        else:
+            raise AssertionError("PATH key not found in plist")
+
+
 # ---------------------------------------------------------------------------
 # cmd_update — macOS launchd detection
 # ---------------------------------------------------------------------------
@@ -177,6 +240,33 @@ class TestLaunchdPlistRefresh:
        assert any("unload" in s for s in cmd_strs)
        assert any("start" in s for s in cmd_strs)

+    def test_launchd_start_recreates_missing_plist_and_loads_service(self, tmp_path, monkeypatch):
+        """launchd_start self-heals when the plist file is missing entirely."""
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        assert not plist_path.exists()
+
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        calls = []
+        def fake_run(cmd, check=False, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        gateway_cli.launchd_start()
+
+        # Should have created the plist
+        assert plist_path.exists()
+        assert "--replace" in plist_path.read_text()
+
+        cmd_strs = [" ".join(c) for c in calls]
+        # Should load the new plist, then start
+        assert any("load" in s for s in cmd_strs)
+        assert any("start" in s for s in cmd_strs)
+        # Should NOT call unload (nothing to unload)
+        assert not any("unload" in s for s in cmd_strs)
+

 class TestCmdUpdateLaunchdRestart:
    """cmd_update correctly detects and handles launchd on macOS."""
@@ -0,0 +1,189 @@
+"""Tests for hermes_cli/webhook.py — webhook subscription CLI."""
+
+import json
+import os
+import pytest
+from argparse import Namespace
+from pathlib import Path
+
+from hermes_cli.webhook import (
+    webhook_command,
+    _load_subscriptions,
+    _save_subscriptions,
+    _subscriptions_path,
+    _is_webhook_enabled,
+)
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    # Default: webhooks enabled (most tests need this)
+    monkeypatch.setattr(
+        "hermes_cli.webhook._is_webhook_enabled", lambda: True
+    )
+
+
+def _make_args(**kwargs):
+    defaults = {
+        "webhook_action": None,
+        "name": "",
+        "prompt": "",
+        "events": "",
+        "description": "",
+        "skills": "",
+        "deliver": "log",
+        "deliver_chat_id": "",
+        "secret": "",
+        "payload": "",
+    }
+    defaults.update(kwargs)
+    return Namespace(**defaults)
+
+
+class TestSubscribe:
+    def test_basic_create(self, capsys):
+        webhook_command(_make_args(webhook_action="subscribe", name="test-hook"))
+        out = capsys.readouterr().out
+        assert "Created" in out
+        assert "/webhooks/test-hook" in out
+        subs = _load_subscriptions()
+        assert "test-hook" in subs
+
+    def test_with_options(self, capsys):
+        webhook_command(_make_args(
+            webhook_action="subscribe",
+            name="gh-issues",
+            events="issues,pull_request",
+            prompt="Issue: {issue.title}",
+            deliver="telegram",
+            deliver_chat_id="12345",
+            description="Watch GitHub",
+        ))
+        subs = _load_subscriptions()
+        route = subs["gh-issues"]
+        assert route["events"] == ["issues", "pull_request"]
+        assert route["prompt"] == "Issue: {issue.title}"
+        assert route["deliver"] == "telegram"
+        assert route["deliver_extra"] == {"chat_id": "12345"}
+
+    def test_custom_secret(self):
+        webhook_command(_make_args(
+            webhook_action="subscribe", name="s", secret="my-secret"
+        ))
+        assert _load_subscriptions()["s"]["secret"] == "my-secret"
+
+    def test_auto_secret(self):
+        webhook_command(_make_args(webhook_action="subscribe", name="s"))
+        secret = _load_subscriptions()["s"]["secret"]
+        assert len(secret) > 20
+
+    def test_update(self, capsys):
+        webhook_command(_make_args(webhook_action="subscribe", name="x", prompt="v1"))
+        webhook_command(_make_args(webhook_action="subscribe", name="x", prompt="v2"))
+        out = capsys.readouterr().out
+        assert "Updated" in out
+        assert _load_subscriptions()["x"]["prompt"] == "v2"
+
+    def test_invalid_name(self, capsys):
+        webhook_command(_make_args(webhook_action="subscribe", name="bad name!"))
+        out = capsys.readouterr().out
+        assert "Error" in out or "Invalid" in out
+        assert _load_subscriptions() == {}
+
+
+class TestList:
+    def test_empty(self, capsys):
+        webhook_command(_make_args(webhook_action="list"))
+        out = capsys.readouterr().out
+        assert "No dynamic" in out
+
+    def test_with_entries(self, capsys):
+        webhook_command(_make_args(webhook_action="subscribe", name="a"))
+        webhook_command(_make_args(webhook_action="subscribe", name="b"))
+        capsys.readouterr()  # clear
+        webhook_command(_make_args(webhook_action="list"))
+        out = capsys.readouterr().out
+        assert "2 webhook" in out
+        assert "a" in out
+        assert "b" in out
+
+
+class TestRemove:
+    def test_remove_existing(self, capsys):
+        webhook_command(_make_args(webhook_action="subscribe", name="temp"))
+        webhook_command(_make_args(webhook_action="remove", name="temp"))
+        out = capsys.readouterr().out
+        assert "Removed" in out
+        assert _load_subscriptions() == {}
+
+    def test_remove_nonexistent(self, capsys):
+        webhook_command(_make_args(webhook_action="remove", name="nope"))
+        out = capsys.readouterr().out
+        assert "No subscription" in out
+
+    def test_selective_remove(self):
+        webhook_command(_make_args(webhook_action="subscribe", name="keep"))
+        webhook_command(_make_args(webhook_action="subscribe", name="drop"))
+        webhook_command(_make_args(webhook_action="remove", name="drop"))
+        subs = _load_subscriptions()
+        assert "keep" in subs
+        assert "drop" not in subs
+
+
+class TestPersistence:
+    def test_file_written(self):
+        webhook_command(_make_args(webhook_action="subscribe", name="persist"))
+        path = _subscriptions_path()
+        assert path.exists()
+        data = json.loads(path.read_text())
+        assert "persist" in data
+
+    def test_corrupted_file(self):
+        path = _subscriptions_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text("broken{{{")
+        assert _load_subscriptions() == {}
+
+
+class TestWebhookEnabledGate:
+    def test_blocks_when_disabled(self, capsys, monkeypatch):
+        monkeypatch.setattr("hermes_cli.webhook._is_webhook_enabled", lambda: False)
+        webhook_command(_make_args(webhook_action="subscribe", name="blocked"))
+        out = capsys.readouterr().out
+        assert "not enabled" in out.lower()
+        assert "hermes gateway setup" in out
+        assert _load_subscriptions() == {}
+
+    def test_blocks_list_when_disabled(self, capsys, monkeypatch):
+        monkeypatch.setattr("hermes_cli.webhook._is_webhook_enabled", lambda: False)
+        webhook_command(_make_args(webhook_action="list"))
+        out = capsys.readouterr().out
+        assert "not enabled" in out.lower()
+
+    def test_allows_when_enabled(self, capsys):
+        # _is_webhook_enabled already patched to True by autouse fixture
+        webhook_command(_make_args(webhook_action="subscribe", name="allowed"))
+        out = capsys.readouterr().out
+        assert "Created" in out
+        assert "allowed" in _load_subscriptions()
+
+    def test_real_check_disabled(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.webhook._get_webhook_config",
+            lambda: {},
+        )
+        monkeypatch.setattr(
+            "hermes_cli.webhook._is_webhook_enabled",
+            lambda: bool({}.get("enabled")),
+        )
+        import hermes_cli.webhook as wh_mod
+        assert wh_mod._is_webhook_enabled() is False
+
+    def test_real_check_enabled(self, monkeypatch):
+        monkeypatch.setattr(
+            "hermes_cli.webhook._is_webhook_enabled",
+            lambda: True,
+        )
+        import hermes_cli.webhook as wh_mod
+        assert wh_mod._is_webhook_enabled() is True
@@ -69,10 +69,12 @@ class TestFormatContextPressure:
        assert isinstance(result, str)

    def test_over_100_percent_capped(self):
-        """Progress > 1.0 should not break the bar."""
+        """Progress > 1.0 should cap both bar and percentage text at 100%."""
        line = format_context_pressure(1.05, 100_000, 0.50)
        assert "▰" in line
        assert line.count("▰") == 20
+        assert "100%" in line
+        assert "105%" not in line


 class TestFormatContextPressureGateway:
@@ -100,6 +102,13 @@ class TestFormatContextPressureGateway:
        msg = format_context_pressure_gateway(0.80, 0.50)
        assert "▰" in msg

+    def test_over_100_percent_capped(self):
+        """Progress > 1.0 should cap percentage text at 100%."""
+        msg = format_context_pressure_gateway(1.09, 0.50)
+        assert "100% to compaction" in msg
+        assert "109%" not in msg
+        assert msg.count("▰") == 20
+

 # ---------------------------------------------------------------------------
 # AIAgent context pressure flag tests
@@ -0,0 +1,154 @@
+"""Tests for percentage clamping at 100% across display paths.
+
+PR #3480 capped context pressure percentage at 100% in agent/display.py
+but missed the same unclamped pattern in 4 other files. When token counts
+overshoot the context length (possible during streaming or before
+compression fires), users see >100% in /stats, gateway status, and
+memory tool output.
+"""
+
+import pytest
+
+
+class TestContextCompressorUsagePercent:
+    """agent/context_compressor.py — get_status() usage_percent"""
+
+    def test_usage_percent_capped_at_100(self):
+        """Tokens exceeding context_length should still show max 100%."""
+        from agent.context_compressor import ContextCompressor
+
+        comp = ContextCompressor.__new__(ContextCompressor)
+        comp.last_prompt_tokens = 210_000  # exceeds context_length
+        comp.context_length = 200_000
+        comp.threshold_tokens = 160_000
+        comp.compression_count = 0
+
+        status = comp.get_status()
+        assert status["usage_percent"] <= 100
+
+    def test_usage_percent_normal(self):
+        """Normal usage should show correct percentage."""
+        from agent.context_compressor import ContextCompressor
+
+        comp = ContextCompressor.__new__(ContextCompressor)
+        comp.last_prompt_tokens = 100_000
+        comp.context_length = 200_000
+        comp.threshold_tokens = 160_000
+        comp.compression_count = 0
+
+        status = comp.get_status()
+        assert status["usage_percent"] == 50.0
+
+    def test_usage_percent_zero_context_length(self):
+        """Zero context_length should return 0, not crash."""
+        from agent.context_compressor import ContextCompressor
+
+        comp = ContextCompressor.__new__(ContextCompressor)
+        comp.last_prompt_tokens = 1000
+        comp.context_length = 0
+        comp.threshold_tokens = 0
+        comp.compression_count = 0
+
+        status = comp.get_status()
+        assert status["usage_percent"] == 0
+
+
+class TestMemoryToolPercentClamp:
+    """tools/memory_tool.py — _success_response and _render_block pct"""
+
+    def test_over_limit_clamped_at_100(self):
+        """Percentage should be capped at 100 even if current > limit."""
+        # Simulate the calculation directly
+        current = 5500
+        limit = 5000
+        pct = min(100, int((current / limit) * 100)) if limit > 0 else 0
+        assert pct == 100
+
+    def test_normal_percentage(self):
+        current = 2500
+        limit = 5000
+        pct = min(100, int((current / limit) * 100)) if limit > 0 else 0
+        assert pct == 50
+
+    def test_zero_limit_returns_zero(self):
+        current = 100
+        limit = 0
+        pct = min(100, int((current / limit) * 100)) if limit > 0 else 0
+        assert pct == 0
+
+
+class TestCLIStatsPercentClamp:
+    """cli.py — /stats command percentage"""
+
+    def test_over_context_clamped_at_100(self):
+        """Tokens exceeding context_length should show max 100%."""
+        last_prompt = 210_000
+        ctx_len = 200_000
+        pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0
+        assert pct == 100
+
+    def test_normal_context(self):
+        last_prompt = 100_000
+        ctx_len = 200_000
+        pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0
+        assert pct == 50.0
+
+    def test_zero_context_length(self):
+        last_prompt = 1000
+        ctx_len = 0
+        pct = min(100, (last_prompt / ctx_len * 100)) if ctx_len else 0
+        assert pct == 0
+
+
+class TestGatewayStatsPercentClamp:
+    """gateway/run.py — _format_usage_stats percentage"""
+
+    def test_over_context_clamped_at_100(self):
+        last_prompt_tokens = 210_000
+        context_length = 200_000
+        pct = min(100, last_prompt_tokens / context_length * 100) if context_length else 0
+        assert pct == 100
+
+    def test_normal_context(self):
+        last_prompt_tokens = 150_000
+        context_length = 200_000
+        pct = min(100, last_prompt_tokens / context_length * 100) if context_length else 0
+        assert pct == 75.0
+
+
+class TestSourceLinesAreClamped:
+    """Verify the actual source files have min(100, ...) applied."""
+
+    @staticmethod
+    def _read_file(rel_path: str) -> str:
+        import os
+        base = os.path.dirname(os.path.dirname(__file__))
+        with open(os.path.join(base, rel_path)) as f:
+            return f.read()
+
+    def test_context_compressor_clamped(self):
+        src = self._read_file("agent/context_compressor.py")
+        assert "min(100," in src, (
+            "context_compressor.py usage_percent is not clamped with min(100, ...)"
+        )
+
+    def test_gateway_run_clamped(self):
+        src = self._read_file("gateway/run.py")
+        # Check that the stats handler has min(100, ...)
+        assert "min(100, ctx.last_prompt_tokens" in src, (
+            "gateway/run.py stats pct is not clamped with min(100, ...)"
+        )
+
+    def test_cli_clamped(self):
+        src = self._read_file("cli.py")
+        assert "min(100, (last_prompt" in src, (
+            "cli.py /stats pct is not clamped with min(100, ...)"
+        )
+
+    def test_memory_tool_clamped(self):
+        src = self._read_file("tools/memory_tool.py")
+        # Both _success_response and _render_block should have min(100, ...)
+        count = src.count("min(100, int((current / limit)")
+        assert count >= 2, (
+            f"memory_tool.py has only {count} clamped pct lines, expected >= 2"
+        )
@@ -226,6 +226,42 @@ class TestPluginHooks:
        # Should not raise despite 1/0
        mgr.invoke_hook("post_tool_call", tool_name="x", args={}, result="r", task_id="")

+    def test_hook_return_values_collected(self, tmp_path, monkeypatch):
+        """invoke_hook() collects non-None return values from callbacks."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir, "ctx_plugin",
+            register_body=(
+                'ctx.register_hook("pre_llm_call", '
+                'lambda **kw: {"context": "memory from plugin"})'
+            ),
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook("pre_llm_call", session_id="s1", user_message="hi",
+                                  conversation_history=[], is_first_turn=True, model="test")
+        assert len(results) == 1
+        assert results[0] == {"context": "memory from plugin"}
+
+    def test_hook_none_returns_excluded(self, tmp_path, monkeypatch):
+        """invoke_hook() excludes None returns from the result list."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir, "none_hook",
+            register_body='ctx.register_hook("post_llm_call", lambda **kw: None)',
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        results = mgr.invoke_hook("post_llm_call", session_id="s1",
+                                  user_message="hi", assistant_response="bye", model="test")
+        assert results == []
+
    def test_invalid_hook_name_warns(self, tmp_path, monkeypatch, caplog):
        """Registering an unknown hook name logs a warning."""
        plugins_dir = tmp_path / "hermes_test" / "plugins"
@@ -617,6 +617,132 @@ class TestBuildSystemPrompt:
        assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"}


+class TestToolUseEnforcementConfig:
+    """Tests for the agent.tool_use_enforcement config option."""
+
+    def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"):
+        """Create an agent with tools and a specific enforcement config."""
+        with (
+            patch(
+                "run_agent.get_tool_definitions",
+                return_value=_make_tool_defs("terminal", "web_search"),
+            ),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}},
+            ),
+        ):
+            a = AIAgent(
+                model=model,
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+            a.client = MagicMock()
+            return a
+
+    def test_auto_injects_for_gpt(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_auto_injects_for_codex(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_auto_skips_for_claude(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
+
+    def test_true_forces_for_all_models(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True)
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_string_true_forces_for_all_models(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_always_forces_for_all_models(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_false_disables_for_gpt(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False)
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
+
+    def test_string_false_disables(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off")
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
+
+    def test_custom_list_matches(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(
+            model="deepseek/deepseek-r1",
+            tool_use_enforcement=["deepseek", "gemini"],
+        )
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_custom_list_no_match(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(
+            model="anthropic/claude-sonnet-4",
+            tool_use_enforcement=["deepseek", "gemini"],
+        )
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
+
+    def test_custom_list_case_insensitive(self):
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        agent = self._make_agent(
+            model="openai/GPT-4.1",
+            tool_use_enforcement=["GPT", "Codex"],
+        )
+        prompt = agent._build_system_prompt()
+        assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
+
+    def test_no_tools_never_injects(self):
+        """Even with enforcement=true, no injection when agent has no tools."""
+        from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
+        with (
+            patch("run_agent.get_tool_definitions", return_value=[]),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch(
+                "hermes_cli.config.load_config",
+                return_value={"agent": {"tool_use_enforcement": True}},
+            ),
+        ):
+            a = AIAgent(
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+                enabled_toolsets=[],
+            )
+            a.client = MagicMock()
+            prompt = a._build_system_prompt()
+            assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
+
+
 class TestInvalidateSystemPrompt:
    def test_clears_cache(self, agent):
        agent._cached_system_prompt = "cached value"
@@ -2667,6 +2793,50 @@ class TestStreamingApiCall:
        assert tc[0].function.name == "search"
        assert tc[1].function.name == "read"

+    def test_ollama_reused_index_separate_tool_calls(self, agent):
+        """Ollama sends every tool call at index 0 with different ids.
+
+        Without the fix, names and arguments get concatenated into one slot.
+        """
+        chunks = [
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]),
+            # Second tool call at the SAME index 0, but different id
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]),
+            _make_chunk(finish_reason="tool_calls"),
+        ]
+        agent.client.chat.completions.create.return_value = iter(chunks)
+
+        resp = agent._interruptible_streaming_api_call({"messages": []})
+
+        tc = resp.choices[0].message.tool_calls
+        assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}"
+        assert tc[0].function.name == "search"
+        assert tc[0].function.arguments == '{"q":"hello"}'
+        assert tc[0].id == "call_a"
+        assert tc[1].function.name == "read_file"
+        assert tc[1].function.arguments == '{"path":"x.py"}'
+        assert tc[1].id == "call_b"
+
+    def test_ollama_reused_index_streamed_args(self, agent):
+        """Ollama with streamed arguments across multiple chunks at same index."""
+        chunks = [
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]),
+            _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]),
+            # New tool call, same index 0
+            _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]),
+            _make_chunk(finish_reason="tool_calls"),
+        ]
+        agent.client.chat.completions.create.return_value = iter(chunks)
+
+        resp = agent._interruptible_streaming_api_call({"messages": []})
+
+        tc = resp.choices[0].message.tool_calls
+        assert len(tc) == 2
+        assert tc[0].function.name == "search"
+        assert tc[0].function.arguments == '{"q":"hello"}'
+        assert tc[1].function.name == "read"
+        assert tc[1].function.arguments == '{}'
+
    def test_content_and_tool_calls_together(self, agent):
        chunks = [
            _make_chunk(content="I'll search"),
@@ -493,22 +493,22 @@ def test_minimax_default_url_uses_anthropic_messages(monkeypatch):
    assert resolved["base_url"] == "https://api.minimax.io/anthropic"


-def test_minimax_stale_v1_url_auto_corrected(monkeypatch):
-    """MiniMax with stale /v1 base URL should be auto-corrected to /anthropic."""
+def test_minimax_v1_url_uses_chat_completions(monkeypatch):
+    """MiniMax with /v1 base URL should use chat_completions (user override for regions where /anthropic 404s)."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
    monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
-    monkeypatch.setenv("MINIMAX_BASE_URL", "https://api.minimax.io/v1")
+    monkeypatch.setenv("MINIMAX_BASE_URL", "https://api.minimax.chat/v1")

    resolved = rp.resolve_runtime_provider(requested="minimax")

    assert resolved["provider"] == "minimax"
-    assert resolved["api_mode"] == "anthropic_messages"
-    assert resolved["base_url"] == "https://api.minimax.io/anthropic"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "https://api.minimax.chat/v1"


-def test_minimax_cn_stale_v1_url_auto_corrected(monkeypatch):
-    """MiniMax-CN with stale /v1 base URL should be auto-corrected to /anthropic."""
+def test_minimax_cn_v1_url_uses_chat_completions(monkeypatch):
+    """MiniMax-CN with /v1 base URL should use chat_completions (user override)."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax-cn")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
    monkeypatch.setenv("MINIMAX_CN_API_KEY", "test-minimax-cn-key")
@@ -517,8 +517,8 @@ def test_minimax_cn_stale_v1_url_auto_corrected(monkeypatch):
    resolved = rp.resolve_runtime_provider(requested="minimax-cn")

    assert resolved["provider"] == "minimax-cn"
-    assert resolved["api_mode"] == "anthropic_messages"
-    assert resolved["base_url"] == "https://api.minimaxi.com/anthropic"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "https://api.minimaxi.com/v1"


 def test_minimax_explicit_api_mode_respected(monkeypatch):
@@ -534,8 +534,8 @@ def test_minimax_explicit_api_mode_respected(monkeypatch):
    assert resolved["api_mode"] == "chat_completions"


-def test_alibaba_default_anthropic_endpoint_uses_anthropic_messages(monkeypatch):
-    """Alibaba with default /apps/anthropic URL should use anthropic_messages mode."""
+def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch):
+    """Alibaba default coding-intl /v1 URL should use chat_completions mode."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "alibaba")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
    monkeypatch.setenv("DASHSCOPE_API_KEY", "test-dashscope-key")
@@ -544,22 +544,22 @@ def test_alibaba_default_anthropic_endpoint_uses_anthropic_messages(monkeypatch)
    resolved = rp.resolve_runtime_provider(requested="alibaba")

    assert resolved["provider"] == "alibaba"
-    assert resolved["api_mode"] == "anthropic_messages"
-    assert resolved["base_url"] == "https://dashscope-intl.aliyuncs.com/apps/anthropic"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "https://coding-intl.dashscope.aliyuncs.com/v1"


-def test_alibaba_openai_compatible_v1_endpoint_stays_chat_completions(monkeypatch):
-    """Alibaba with /v1 coding endpoint should use chat_completions mode."""
+def test_alibaba_anthropic_endpoint_override_uses_anthropic_messages(monkeypatch):
+    """Alibaba with /apps/anthropic URL override should auto-detect anthropic_messages mode."""
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "alibaba")
    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
    monkeypatch.setenv("DASHSCOPE_API_KEY", "test-dashscope-key")
-    monkeypatch.setenv("DASHSCOPE_BASE_URL", "https://coding-intl.dashscope.aliyuncs.com/v1")
+    monkeypatch.setenv("DASHSCOPE_BASE_URL", "https://coding-intl.dashscope.aliyuncs.com/apps/anthropic")

    resolved = rp.resolve_runtime_provider(requested="alibaba")

    assert resolved["provider"] == "alibaba"
-    assert resolved["api_mode"] == "chat_completions"
-    assert resolved["base_url"] == "https://coding-intl.dashscope.aliyuncs.com/v1"
+    assert resolved["api_mode"] == "anthropic_messages"
+    assert resolved["base_url"] == "https://coding-intl.dashscope.aliyuncs.com/apps/anthropic"


 def test_named_custom_provider_anthropic_api_mode(monkeypatch):
@@ -362,9 +362,11 @@ class TestStreamingCallbacks:

        # Text before tool call IS fired (we don't know yet it will have tools)
        assert "thinking..." in deltas
-        # Text after tool call is NOT fired
-        assert " more text" not in deltas
-        # But content is still accumulated in the response
+        # Text after tool call IS still routed to stream_delta_callback so that
+        # reasoning tag extraction can fire (PR #3566).  Display-level suppression
+        # of non-reasoning text happens in the CLI's _stream_delta, not here.
+        assert " more text" in deltas
+        # Content is still accumulated in the response
        assert response.choices[0].message.content == "thinking... more text"


@@ -532,6 +534,121 @@ class TestStreamingFallback:
        mock_non_stream.assert_called_once()
        assert mock_close.call_count >= 1

+    @patch("run_agent.AIAgent._interruptible_api_call")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_sse_connection_lost_retried_as_transient(self, mock_close, mock_create, mock_non_stream):
+        """SSE 'Network connection lost' (APIError w/ no status_code) retries like httpx errors.
+
+        OpenRouter sends {"error":{"message":"Network connection lost."}} as an SSE
+        event when the upstream stream drops.  The OpenAI SDK raises APIError from
+        this.  It should be retried at the streaming level, same as httpx connection
+        errors, before falling back to non-streaming.
+        """
+        from run_agent import AIAgent
+        import httpx
+
+        # Create an APIError that mimics what the OpenAI SDK raises from SSE error events.
+        # Key: no status_code attribute (unlike APIStatusError which has one).
+        from openai import APIError as OAIAPIError
+        sse_error = OAIAPIError(
+            message="Network connection lost.",
+            request=httpx.Request("POST", "https://openrouter.ai/api/v1/chat/completions"),
+            body={"message": "Network connection lost."},
+        )
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = sse_error
+        mock_create.return_value = mock_client
+
+        fallback_response = SimpleNamespace(
+            id="fallback",
+            model="test",
+            choices=[SimpleNamespace(
+                index=0,
+                message=SimpleNamespace(
+                    role="assistant",
+                    content="fallback after SSE retries",
+                    tool_calls=None,
+                    reasoning_content=None,
+                ),
+                finish_reason="stop",
+            )],
+            usage=None,
+        )
+        mock_non_stream.return_value = fallback_response
+
+        agent = AIAgent(
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        response = agent._interruptible_streaming_api_call({})
+
+        assert response.choices[0].message.content == "fallback after SSE retries"
+        # Should retry 3 times (default HERMES_STREAM_RETRIES=2 → 3 attempts)
+        # before falling back to non-streaming
+        assert mock_client.chat.completions.create.call_count == 3
+        mock_non_stream.assert_called_once()
+        # Connection cleanup should happen for each failed retry
+        assert mock_close.call_count >= 2
+
+    @patch("run_agent.AIAgent._interruptible_api_call")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_sse_non_connection_error_falls_back_immediately(self, mock_close, mock_create, mock_non_stream):
+        """SSE errors that aren't connection-related still fall back immediately (no stream retry)."""
+        from run_agent import AIAgent
+        import httpx
+
+        from openai import APIError as OAIAPIError
+        sse_error = OAIAPIError(
+            message="Invalid model configuration.",
+            request=httpx.Request("POST", "https://openrouter.ai/api/v1/chat/completions"),
+            body={"message": "Invalid model configuration."},
+        )
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = sse_error
+        mock_create.return_value = mock_client
+
+        fallback_response = SimpleNamespace(
+            id="fallback",
+            model="test",
+            choices=[SimpleNamespace(
+                index=0,
+                message=SimpleNamespace(
+                    role="assistant",
+                    content="fallback no retry",
+                    tool_calls=None,
+                    reasoning_content=None,
+                ),
+                finish_reason="stop",
+            )],
+            usage=None,
+        )
+        mock_non_stream.return_value = fallback_response
+
+        agent = AIAgent(
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        response = agent._interruptible_streaming_api_call({})
+
+        assert response.choices[0].message.content == "fallback no retry"
+        # Should NOT retry — goes straight to non-streaming fallback
+        assert mock_client.chat.completions.create.call_count == 1
+        mock_non_stream.assert_called_once()
+

 # ── Test: Reasoning Streaming ────────────────────────────────────────────

@@ -0,0 +1,154 @@
+"""Tests for surrogate character sanitization in user input.
+
+Surrogates (U+D800..U+DFFF) are invalid in UTF-8 and crash json.dumps()
+inside the OpenAI SDK. They can appear via clipboard paste from rich-text
+editors like Google Docs.
+"""
+import json
+import pytest
+from unittest.mock import MagicMock, patch
+
+from run_agent import (
+    _sanitize_surrogates,
+    _sanitize_messages_surrogates,
+    _SURROGATE_RE,
+)
+
+
+class TestSanitizeSurrogates:
+    """Test the _sanitize_surrogates() helper."""
+
+    def test_normal_text_unchanged(self):
+        text = "Hello, this is normal text with unicode: café ñ 日本語 🎉"
+        assert _sanitize_surrogates(text) == text
+
+    def test_empty_string(self):
+        assert _sanitize_surrogates("") == ""
+
+    def test_single_surrogate_replaced(self):
+        result = _sanitize_surrogates("Hello \udce2 world")
+        assert result == "Hello \ufffd world"
+
+    def test_multiple_surrogates_replaced(self):
+        result = _sanitize_surrogates("a\ud800b\udc00c\udfff")
+        assert result == "a\ufffdb\ufffdc\ufffd"
+
+    def test_all_surrogate_range(self):
+        """Verify the regex catches the full surrogate range."""
+        for cp in [0xD800, 0xD900, 0xDA00, 0xDB00, 0xDC00, 0xDD00, 0xDE00, 0xDF00, 0xDFFF]:
+            text = f"test{chr(cp)}end"
+            result = _sanitize_surrogates(text)
+            assert '\ufffd' in result, f"Surrogate U+{cp:04X} not caught"
+
+    def test_result_is_json_serializable(self):
+        """Sanitized text must survive json.dumps + utf-8 encoding."""
+        dirty = "data \udce2\udcb0 from clipboard"
+        clean = _sanitize_surrogates(dirty)
+        serialized = json.dumps({"content": clean}, ensure_ascii=False)
+        # Must not raise UnicodeEncodeError
+        serialized.encode("utf-8")
+
+    def test_original_surrogates_fail_encoding(self):
+        """Confirm the original bug: surrogates crash utf-8 encoding."""
+        dirty = "data \udce2 from clipboard"
+        serialized = json.dumps({"content": dirty}, ensure_ascii=False)
+        with pytest.raises(UnicodeEncodeError):
+            serialized.encode("utf-8")
+
+
+class TestSanitizeMessagesSurrogates:
+    """Test the _sanitize_messages_surrogates() helper for message lists."""
+
+    def test_clean_messages_returns_false(self):
+        msgs = [
+            {"role": "user", "content": "all clean"},
+            {"role": "assistant", "content": "me too"},
+        ]
+        assert _sanitize_messages_surrogates(msgs) is False
+
+    def test_dirty_string_content_sanitized(self):
+        msgs = [
+            {"role": "user", "content": "text with \udce2 surrogate"},
+        ]
+        assert _sanitize_messages_surrogates(msgs) is True
+        assert "\ufffd" in msgs[0]["content"]
+        assert "\udce2" not in msgs[0]["content"]
+
+    def test_dirty_multimodal_content_sanitized(self):
+        msgs = [
+            {"role": "user", "content": [
+                {"type": "text", "text": "multimodal \udce2 content"},
+                {"type": "image_url", "image_url": {"url": "http://example.com"}},
+            ]},
+        ]
+        assert _sanitize_messages_surrogates(msgs) is True
+        assert "\ufffd" in msgs[0]["content"][0]["text"]
+        assert "\udce2" not in msgs[0]["content"][0]["text"]
+
+    def test_mixed_clean_and_dirty(self):
+        msgs = [
+            {"role": "user", "content": "clean text"},
+            {"role": "user", "content": "dirty \udce2 text"},
+            {"role": "assistant", "content": "clean response"},
+        ]
+        assert _sanitize_messages_surrogates(msgs) is True
+        assert msgs[0]["content"] == "clean text"
+        assert "\ufffd" in msgs[1]["content"]
+        assert msgs[2]["content"] == "clean response"
+
+    def test_non_dict_items_skipped(self):
+        msgs = ["not a dict", {"role": "user", "content": "ok"}]
+        assert _sanitize_messages_surrogates(msgs) is False
+
+    def test_tool_messages_sanitized(self):
+        """Tool results could also contain surrogates from file reads etc."""
+        msgs = [
+            {"role": "tool", "content": "result with \udce2 data", "tool_call_id": "x"},
+        ]
+        assert _sanitize_messages_surrogates(msgs) is True
+        assert "\ufffd" in msgs[0]["content"]
+
+
+class TestRunConversationSurrogateSanitization:
+    """Integration: verify run_conversation sanitizes user_message."""
+
+    @patch("run_agent.AIAgent._build_system_prompt")
+    @patch("run_agent.AIAgent._interruptible_streaming_api_call")
+    @patch("run_agent.AIAgent._interruptible_api_call")
+    def test_user_message_surrogates_sanitized(self, mock_api, mock_stream, mock_sys):
+        """Surrogates in user_message are stripped before API call."""
+        from run_agent import AIAgent
+
+        mock_sys.return_value = "system prompt"
+
+        # Mock streaming to return a simple response
+        mock_choice = MagicMock()
+        mock_choice.message.content = "response"
+        mock_choice.message.tool_calls = None
+        mock_choice.message.refusal = None
+        mock_choice.finish_reason = "stop"
+        mock_choice.message.reasoning_content = None
+
+        mock_response = MagicMock()
+        mock_response.choices = [mock_choice]
+        mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=5, total_tokens=15)
+        mock_response.model = "test-model"
+        mock_response.id = "test-id"
+
+        mock_stream.return_value = mock_response
+        mock_api.return_value = mock_response
+
+        agent = AIAgent(model="test/model", quiet_mode=True, skip_memory=True, skip_context_files=True)
+        agent.client = MagicMock()
+
+        # Pass a message with surrogates
+        result = agent.run_conversation(
+            user_message="test \udce2 message",
+            conversation_history=[],
+        )
+
+        # The message stored in history should have surrogates replaced
+        for msg in result.get("messages", []):
+            if msg.get("role") == "user":
+                assert "\udce2" not in msg["content"], "Surrogate leaked into stored message"
+                assert "\ufffd" in msg["content"], "Replacement char not in stored message"
@@ -512,6 +512,30 @@ class TestGatewayProtection:
        dangerous, key, desc = detect_dangerous_command(cmd)
        assert dangerous is False

+    def test_pkill_hermes_detected(self):
+        """pkill targeting hermes/gateway processes must be caught."""
+        cmd = 'pkill -f "cli.py --gateway"'
+        dangerous, key, desc = detect_dangerous_command(cmd)
+        assert dangerous is True
+        assert "self-termination" in desc
+
+    def test_killall_hermes_detected(self):
+        cmd = "killall hermes"
+        dangerous, key, desc = detect_dangerous_command(cmd)
+        assert dangerous is True
+        assert "self-termination" in desc
+
+    def test_pkill_gateway_detected(self):
+        cmd = "pkill -f gateway"
+        dangerous, key, desc = detect_dangerous_command(cmd)
+        assert dangerous is True
+
+    def test_pkill_unrelated_not_flagged(self):
+        """pkill targeting unrelated processes should not be flagged."""
+        cmd = "pkill -f nginx"
+        dangerous, key, desc = detect_dangerous_command(cmd)
+        assert dangerous is False
+

 class TestNormalizationBypass:
    """Obfuscation techniques must not bypass dangerous command detection."""
@@ -0,0 +1,109 @@
+"""Tests for None guard on browser_tool LLM response content.
+
+browser_tool.py has two call sites that access response.choices[0].message.content
+without checking for None — _extract_relevant_content (line 996) and
+browser_vision (line 1626). When reasoning-only models (DeepSeek-R1, QwQ)
+return content=None, these produce null snapshots or null analysis.
+
+These tests verify both sites are guarded.
+"""
+
+import types
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ── helpers ────────────────────────────────────────────────────────────────
+
+def _make_response(content):
+    """Build a minimal OpenAI-compatible ChatCompletion response stub."""
+    message = types.SimpleNamespace(content=content)
+    choice = types.SimpleNamespace(message=message)
+    return types.SimpleNamespace(choices=[choice])
+
+
+# ── _extract_relevant_content (line 996) ──────────────────────────────────
+
+class TestExtractRelevantContentNoneGuard:
+    """tools/browser_tool.py — _extract_relevant_content()"""
+
+    def test_none_content_falls_back_to_truncated(self):
+        """When LLM returns None content, should fall back to truncated snapshot."""
+        with patch("tools.browser_tool.call_llm", return_value=_make_response(None)), \
+             patch("tools.browser_tool._get_extraction_model", return_value="test-model"):
+            from tools.browser_tool import _extract_relevant_content
+            result = _extract_relevant_content("This is a long snapshot text", "find the button")
+
+        assert result is not None
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_normal_content_returned(self):
+        """Normal string content should pass through."""
+        with patch("tools.browser_tool.call_llm", return_value=_make_response("Extracted content here")), \
+             patch("tools.browser_tool._get_extraction_model", return_value="test-model"):
+            from tools.browser_tool import _extract_relevant_content
+            result = _extract_relevant_content("snapshot text", "task")
+
+        assert result == "Extracted content here"
+
+    def test_empty_string_content_falls_back(self):
+        """Empty string content should also fall back to truncated."""
+        with patch("tools.browser_tool.call_llm", return_value=_make_response("   ")), \
+             patch("tools.browser_tool._get_extraction_model", return_value="test-model"):
+            from tools.browser_tool import _extract_relevant_content
+            result = _extract_relevant_content("This is a long snapshot text", "task")
+
+        assert result is not None
+        assert len(result) > 0
+
+
+# ── browser_vision (line 1626) ────────────────────────────────────────────
+
+class TestBrowserVisionNoneGuard:
+    """tools/browser_tool.py — browser_vision() analysis extraction"""
+
+    def test_none_content_produces_fallback_message(self):
+        """When LLM returns None content, analysis should have a fallback message."""
+        response = _make_response(None)
+        analysis = (response.choices[0].message.content or "").strip()
+        fallback = analysis or "Vision analysis returned no content."
+
+        assert fallback == "Vision analysis returned no content."
+
+    def test_normal_content_passes_through(self):
+        """Normal analysis content should pass through unchanged."""
+        response = _make_response("  The page shows a login form.  ")
+        analysis = (response.choices[0].message.content or "").strip()
+        fallback = analysis or "Vision analysis returned no content."
+
+        assert fallback == "The page shows a login form."
+
+
+# ── source line verification ──────────────────────────────────────────────
+
+class TestBrowserSourceLinesAreGuarded:
+    """Verify the actual source file has the fix applied."""
+
+    @staticmethod
+    def _read_file() -> str:
+        import os
+        base = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+        with open(os.path.join(base, "tools", "browser_tool.py")) as f:
+            return f.read()
+
+    def test_extract_relevant_content_guarded(self):
+        src = self._read_file()
+        # The old unguarded pattern should NOT exist
+        assert "return response.choices[0].message.content\n" not in src, (
+            "browser_tool.py _extract_relevant_content still has unguarded "
+            ".content return — apply None guard"
+        )
+
+    def test_browser_vision_guarded(self):
+        src = self._read_file()
+        assert "analysis = response.choices[0].message.content\n" not in src, (
+            "browser_tool.py browser_vision still has unguarded "
+            ".content assignment — apply None guard"
+        )
@@ -63,6 +63,18 @@ class TestLocalOneShotRegression:
        assert r["output"].strip() == ""
        env.cleanup()

+    def test_oneshot_heredoc_does_not_leak_fence_wrapper(self):
+        """Heredoc closing line must not be merged with the fence wrapper tail."""
+        env = LocalEnvironment(persistent=False)
+        cmd = "cat <<'H_EOF'\nheredoc body line\nH_EOF"
+        r = env.execute(cmd)
+        env.cleanup()
+        assert r["returncode"] == 0
+        assert "heredoc body line" in r["output"]
+        assert "__hermes_rc" not in r["output"]
+        assert "printf '" not in r["output"]
+        assert "exit $" not in r["output"]
+

 class TestLocalPersistent:
    @pytest.fixture
@@ -4,10 +4,9 @@ Covers the bugs discovered while setting up TBLite evaluation:
 1. Tool resolution — terminal + file tools load correctly
 2. CWD fix — host paths get replaced with /root for container backends
 3. ephemeral_disk version check
-4. Tilde ~ replaced with /root for container backends
-5. ensurepip fix in Modal image builder
-6. install_pipx stays True for swerex-remote
-7. /home/ added to host prefix check
+4. ensurepip fix in Modal image builder
+5. No swe-rex dependency — uses native Modal SDK
+6. /home/ added to host prefix check
 """

 import os
@@ -251,7 +250,7 @@ class TestModalEnvironmentDefaults:


 # =========================================================================
-# Test 7: ensurepip fix in patches.py
+# Test 7: ensurepip fix in ModalEnvironment
 # =========================================================================

 class TestEnsurepipFix:
@@ -275,17 +274,24 @@ class TestEnsurepipFix:
            "to fix pip before Modal's bootstrap"
        )

-    def test_modal_environment_uses_install_pipx(self):
-        """ModalEnvironment should pass install_pipx to ModalDeployment."""
+    def test_modal_environment_uses_native_sdk(self):
+        """ModalEnvironment should use Modal SDK directly, not swe-rex."""
        try:
            from tools.environments.modal import ModalEnvironment
        except ImportError:
            pytest.skip("tools.environments.modal not importable")

        import inspect
-        source = inspect.getsource(ModalEnvironment.__init__)
-        assert "install_pipx" in source, (
-            "ModalEnvironment should pass install_pipx to ModalDeployment"
+        source = inspect.getsource(ModalEnvironment)
+        assert "swerex" not in source.lower(), (
+            "ModalEnvironment should not depend on swe-rex; "
+            "use Modal SDK directly via Sandbox.create() + exec()"
+        )
+        assert "Sandbox.create.aio" in source, (
+            "ModalEnvironment should use async Modal Sandbox.create.aio()"
+        )
+        assert "exec.aio" in source, (
+            "ModalEnvironment should use Sandbox.exec.aio() for command execution"
        )


@@ -63,6 +63,35 @@ class TestSkillViewRegistersPassthrough:
        assert result["success"] is True
        assert is_env_passthrough("TENOR_API_KEY")

+    def test_remote_backend_persisted_env_vars_registered(self, tmp_path, monkeypatch):
+        """Remote-backed skills still register locally available env vars."""
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        _create_skill(
+            tmp_path,
+            "test-skill",
+            frontmatter_extra=(
+                "required_environment_variables:\n"
+                "  - name: TENOR_API_KEY\n"
+                "    prompt: Enter your Tenor API key\n"
+            ),
+        )
+        monkeypatch.setattr("tools.skills_tool.SKILLS_DIR", tmp_path)
+
+        from hermes_cli.config import save_env_value
+
+        save_env_value("TENOR_API_KEY", "persisted-value-123")
+        monkeypatch.delenv("TENOR_API_KEY", raising=False)
+
+        with patch("tools.skills_tool._secret_capture_callback", None):
+            from tools.skills_tool import skill_view
+
+            result = json.loads(skill_view(name="test-skill"))
+
+        assert result["success"] is True
+        assert result["setup_needed"] is False
+        assert result["missing_required_environment_variables"] == []
+        assert is_env_passthrough("TENOR_API_KEY")
+
    def test_missing_env_vars_not_registered(self, tmp_path, monkeypatch):
        """When a skill declares required_environment_variables but the var is NOT set,
        it should NOT be registered in the passthrough."""
@@ -813,6 +813,29 @@ class TestSkillViewPrerequisites:
        assert result["setup_needed"] is False
        assert result["missing_required_environment_variables"] == []

+    def test_remote_backend_treats_persisted_env_as_available(
+        self, tmp_path, monkeypatch
+    ):
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "remote-ready",
+                frontmatter_extra="prerequisites:\n  env_vars: [PERSISTED_REMOTE_KEY]\n",
+            )
+            from hermes_cli.config import save_env_value
+
+            save_env_value("PERSISTED_REMOTE_KEY", "persisted-value")
+            monkeypatch.delenv("PERSISTED_REMOTE_KEY", raising=False)
+            raw = skill_view("remote-ready")
+
+        result = json.loads(raw)
+        assert result["success"] is True
+        assert result["setup_needed"] is False
+        assert result["missing_required_environment_variables"] == []
+        assert result["readiness_status"] == "available"
+
    def test_no_setup_metadata_when_no_required_envs(self, tmp_path):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_skill(tmp_path, "plain-skill")
@@ -878,17 +901,11 @@ class TestSkillViewPrerequisites:
        assert result["setup_needed"] is True

    @pytest.mark.parametrize(
-        "backend,expected_note",
-        [
-            ("ssh", "remote environment"),
-            ("daytona", "remote environment"),
-            ("docker", "docker-backed skills"),
-            ("singularity", "singularity-backed skills"),
-            ("modal", "modal-backed skills"),
-        ],
+        "backend",
+        ["ssh", "daytona", "docker", "singularity", "modal"],
    )
-    def test_remote_backend_keeps_setup_needed_after_local_secret_capture(
-        self, tmp_path, monkeypatch, backend, expected_note
+    def test_remote_backend_becomes_available_after_local_secret_capture(
+        self, tmp_path, monkeypatch, backend
    ):
        monkeypatch.setenv("TERMINAL_ENV", backend)
        monkeypatch.delenv("TENOR_API_KEY", raising=False)
@@ -926,10 +943,10 @@ class TestSkillViewPrerequisites:
        result = json.loads(raw)
        assert result["success"] is True
        assert len(calls) == 1
-        assert result["setup_needed"] is True
-        assert result["readiness_status"] == "setup_needed"
-        assert result["missing_required_environment_variables"] == ["TENOR_API_KEY"]
-        assert expected_note in result["setup_note"].lower()
+        assert result["setup_needed"] is False
+        assert result["readiness_status"] == "available"
+        assert result["missing_required_environment_variables"] == []
+        assert "setup_note" not in result

    def test_skill_view_surfaces_skill_read_errors(self, tmp_path, monkeypatch):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
@@ -63,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
    monkeypatch.setenv("TERMINAL_ENV", "modal")
    monkeypatch.setenv("HOME", str(tmp_path))
    monkeypatch.setenv("USERPROFILE", str(tmp_path))
-    # Pretend swerex is installed
+    # Pretend modal is installed
    monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())

    with caplog.at_level(logging.ERROR):
@@ -53,6 +53,8 @@ DANGEROUS_PATTERNS = [
    # Gateway protection: never start gateway outside systemd management
    (r'gateway\s+run\b.*(&\s*$|&\s*;|\bdisown\b|\bsetsid\b)', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
    (r'\bnohup\b.*gateway\s+run\b', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
+    # Self-termination protection: prevent agent from killing its own process
+    (r'\b(pkill|killall)\b.*\b(hermes|gateway|cli\.py)\b', "kill hermes/gateway process (self-termination)"),
 ]


@@ -993,7 +993,7 @@ def _extract_relevant_content(
        if model:
            call_kwargs["model"] = model
        response = call_llm(**call_kwargs)
-        return response.choices[0].message.content
+        return (response.choices[0].message.content or "").strip() or _truncate_snapshot(snapshot_text)
    except Exception:
        return _truncate_snapshot(snapshot_text)

@@ -1523,8 +1523,8 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
    effective_task_id = task_id or "default"
    
    # Save screenshot to persistent location so it can be shared with users
-    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-    screenshots_dir = hermes_home / "browser_screenshots"
+    from hermes_constants import get_hermes_dir
+    screenshots_dir = get_hermes_dir("cache/screenshots", "browser_screenshots")
    screenshot_path = screenshots_dir / f"browser_screenshot_{uuid_mod.uuid4().hex}.png"
    
    try:
@@ -1623,10 +1623,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
            call_kwargs["model"] = vision_model
        response = call_llm(**call_kwargs)
        
-        analysis = response.choices[0].message.content
+        analysis = (response.choices[0].message.content or "").strip()
        response_data = {
            "success": True,
-            "analysis": analysis,
+            "analysis": analysis or "Vision analysis returned no content.",
            "screenshot_path": str(screenshot_path),
        }
        # Include annotation data if annotated screenshot was taken
@@ -391,12 +391,17 @@ class LocalEnvironment(PersistentShellMixin, BaseEnvironment):
            effective_stdin = stdin_data

        user_shell = _find_bash()
+        # Newline-separated wrapper (not `cmd; __hermes_rc=...` on one line).
+        # A trailing `; __hermes_rc` glued to `<<EOF` / a closing `EOF` line breaks
+        # heredoc parsing: the delimiter must be alone on its line, otherwise the
+        # rest of this script becomes heredoc body and leaks into stdout (e.g. gh
+        # issue/PR flows that use here-documents for bodies).
        fenced_cmd = (
-            f"printf '{_OUTPUT_FENCE}';"
-            f" {exec_command};"
-            f" __hermes_rc=$?;"
-            f" printf '{_OUTPUT_FENCE}';"
-            f" exit $__hermes_rc"
+            f"printf '{_OUTPUT_FENCE}'\n"
+            f"{exec_command}\n"
+            f"__hermes_rc=$?\n"
+            f"printf '{_OUTPUT_FENCE}'\n"
+            f"exit $__hermes_rc\n"
        )
        run_env = _make_run_env(self.env)

@@ -1,13 +1,20 @@
-"""Modal cloud execution environment using SWE-ReX directly.
+"""Modal cloud execution environment using the Modal SDK directly.

-Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
-is snapshotted on cleanup and restored on next creation, so installed packages,
-project files, and config changes survive across sessions.
+Replaces the previous swe-rex ModalDeployment wrapper with native Modal
+Sandbox.create() + Sandbox.exec() calls.  This eliminates the need for
+swe-rex's HTTP runtime server and unencrypted tunnel, fixing:
+  - AsyncUsageWarning from synchronous App.lookup in async context
+  - DeprecationError from unencrypted_ports / .url on unencrypted tunnels
+
+Supports persistent filesystem snapshots: when enabled, the sandbox's
+filesystem is snapshotted on cleanup and restored on next creation, so
+installed packages, project files, and config changes survive across sessions.
 """

 import asyncio
 import json
 import logging
+import shlex
 import threading
 import uuid
 from pathlib import Path
@@ -39,7 +46,7 @@ def _save_snapshots(data: Dict[str, str]) -> None:


 class _AsyncWorker:
-    """Background thread with its own event loop for async-safe swe-rex calls.
+    """Background thread with its own event loop for async-safe Modal calls.

    Allows sync code to submit async coroutines and block for results,
    even when called from inside another running event loop (e.g. Atropos).
@@ -75,9 +82,10 @@ class _AsyncWorker:


 class ModalEnvironment(BaseEnvironment):
-    """Modal cloud execution via SWE-ReX.
+    """Modal cloud execution via native Modal SDK.

-    Uses swe-rex's ModalDeployment directly for sandbox management.
+    Uses Modal's Sandbox.create() for container lifecycle and Sandbox.exec()
+    for command execution — no intermediate HTTP server or tunnel required.
    Adds sudo -S support, configurable resources (CPU, memory, disk),
    and optional filesystem persistence via Modal's snapshot API.
    """
@@ -96,7 +104,8 @@ class ModalEnvironment(BaseEnvironment):
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._base_image = image
-        self._deployment = None
+        self._sandbox = None
+        self._app = None
        self._worker = _AsyncWorker()

        sandbox_kwargs = dict(modal_sandbox_kwargs or {})
@@ -128,25 +137,27 @@ class ModalEnvironment(BaseEnvironment):
                ],
            )

-        # Start the async worker thread and create the deployment on it
+        # Start the async worker thread and create sandbox on it
        # so all gRPC channels are bound to the worker's event loop.
        self._worker.start()

-        from swerex.deployment.modal import ModalDeployment
-
-        async def _create_and_start():
-            deployment = ModalDeployment(
-                image=effective_image,
-                startup_timeout=180.0,
-                runtime_timeout=3600.0,
-                deployment_timeout=3600.0,
-                install_pipx=True,
-                modal_sandbox_kwargs=sandbox_kwargs,
+        async def _create_sandbox():
+            app = await _modal.App.lookup.aio(
+                "hermes-agent", create_if_missing=True
            )
-            await deployment.start()
-            return deployment
+            sandbox = await _modal.Sandbox.create.aio(
+                "sleep", "infinity",
+                image=effective_image,
+                app=app,
+                timeout=int(sandbox_kwargs.pop("timeout", 3600)),
+                **sandbox_kwargs,
+            )
+            return app, sandbox

-        self._deployment = self._worker.run_coroutine(_create_and_start())
+        self._app, self._sandbox = self._worker.run_coroutine(
+            _create_sandbox(), timeout=300
+        )
+        logger.info("Modal: sandbox created (task=%s)", self._task_id)

    def execute(self, command: str, cwd: str = "", *,
                timeout: int | None = None,
@@ -159,42 +170,47 @@ class ModalEnvironment(BaseEnvironment):

        exec_command, sudo_stdin = self._prepare_command(command)

-        # Modal sandboxes execute commands via the Modal SDK and cannot pipe
-        # subprocess stdin directly the way a local Popen can.  When a sudo
-        # password is present, use a shell-level pipe from printf so that the
-        # password feeds sudo -S without appearing as an echo argument embedded
-        # in the shell string.
+        # Modal sandboxes execute commands via exec() and cannot pipe
+        # subprocess stdin directly.  When a sudo password is present,
+        # use a shell-level pipe from printf.
        if sudo_stdin is not None:
-            import shlex
            exec_command = (
                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
            )

-        from swerex.runtime.abstract import Command as RexCommand
-
        effective_cwd = cwd or self.cwd
        effective_timeout = timeout or self.timeout

+        # Wrap command with cd + stderr merge
+        full_command = f"cd {shlex.quote(effective_cwd)} && {exec_command}"
+
        # Run in a background thread so we can poll for interrupts
        result_holder = {"value": None, "error": None}

        def _run():
            try:
                async def _do_execute():
-                    return await self._deployment.runtime.execute(
-                        RexCommand(
-                            command=exec_command,
-                            shell=True,
-                            check=False,
-                            cwd=effective_cwd,
-                            timeout=effective_timeout,
-                            merge_output_streams=True,
-                        )
+                    process = await self._sandbox.exec.aio(
+                        "bash", "-c", full_command,
+                        timeout=effective_timeout,
                    )
-                output = self._worker.run_coroutine(_do_execute())
+                    # Read stdout; redirect stderr to stdout in the shell
+                    # command so we get merged output
+                    stdout = await process.stdout.read.aio()
+                    stderr = await process.stderr.read.aio()
+                    exit_code = await process.wait.aio()
+                    # Merge stdout + stderr (stderr after stdout)
+                    output = stdout
+                    if stderr:
+                        output = f"{stdout}\n{stderr}" if stdout else stderr
+                    return output, exit_code
+
+                output, exit_code = self._worker.run_coroutine(
+                    _do_execute(), timeout=effective_timeout + 30
+                )
                result_holder["value"] = {
-                    "output": output.stdout,
-                    "returncode": output.exit_code,
+                    "output": output,
+                    "returncode": exit_code,
                }
            except Exception as e:
                result_holder["error"] = e
@@ -206,7 +222,7 @@ class ModalEnvironment(BaseEnvironment):
            if is_interrupted():
                try:
                    self._worker.run_coroutine(
-                        asyncio.wait_for(self._deployment.stop(), timeout=10),
+                        self._sandbox.terminate.aio(),
                        timeout=15,
                    )
                except Exception:
@@ -222,38 +238,37 @@ class ModalEnvironment(BaseEnvironment):

    def cleanup(self):
        """Snapshot the filesystem (if persistent) then stop the sandbox."""
-        if self._deployment is None:
+        if self._sandbox is None:
            return

        if self._persistent:
            try:
-                sandbox = getattr(self._deployment, '_sandbox', None)
-                if sandbox:
-                    async def _snapshot():
-                        img = await sandbox.snapshot_filesystem.aio()
-                        return img.object_id
+                async def _snapshot():
+                    img = await self._sandbox.snapshot_filesystem.aio()
+                    return img.object_id

-                    try:
-                        snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
-                    except Exception:
-                        snapshot_id = None
+                try:
+                    snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
+                except Exception:
+                    snapshot_id = None

-                    if snapshot_id:
-                        snapshots = _load_snapshots()
-                        snapshots[self._task_id] = snapshot_id
-                        _save_snapshots(snapshots)
-                        logger.info("Modal: saved filesystem snapshot %s for task %s",
-                                    snapshot_id[:20], self._task_id)
+                if snapshot_id:
+                    snapshots = _load_snapshots()
+                    snapshots[self._task_id] = snapshot_id
+                    _save_snapshots(snapshots)
+                    logger.info("Modal: saved filesystem snapshot %s for task %s",
+                                snapshot_id[:20], self._task_id)
            except Exception as e:
                logger.warning("Modal: filesystem snapshot failed: %s", e)

        try:
            self._worker.run_coroutine(
-                asyncio.wait_for(self._deployment.stop(), timeout=10),
+                self._sandbox.terminate.aio(),
                timeout=15,
            )
        except Exception:
            pass
        finally:
            self._worker.stop()
-            self._deployment = None
+            self._sandbox = None
+            self._app = None
@@ -171,8 +171,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
        # Security: block direct reads of internal Hermes cache/index files
        # to prevent prompt injection via catalog or hub metadata files.
        import pathlib as _pathlib
+        from hermes_constants import get_hermes_home as _get_hh
        _resolved = _pathlib.Path(path).expanduser().resolve()
-        _hermes_home = _pathlib.Path("~/.hermes").expanduser().resolve()
+        _hermes_home = _get_hh().resolve()
        _blocked_dirs = [
            _hermes_home / "skills" / ".hub" / "index-cache",
            _hermes_home / "skills" / ".hub",
@@ -98,6 +98,13 @@ try:
        _MCP_HTTP_AVAILABLE = True
    except ImportError:
        _MCP_HTTP_AVAILABLE = False
+    # Prefer the non-deprecated API (mcp >= 1.24.0); fall back to the
+    # deprecated wrapper for older SDK versions.
+    try:
+        from mcp.client.streamable_http import streamable_http_client
+        _MCP_NEW_HTTP = True
+    except ImportError:
+        _MCP_NEW_HTTP = False
    # Sampling types -- separated so older SDK versions don't break MCP support
    try:
        from mcp.types import (
@@ -762,21 +769,50 @@ class MCPServerTask:
                logger.warning("MCP OAuth setup failed for '%s': %s", self.name, exc)

        sampling_kwargs = self._sampling.session_kwargs() if self._sampling else {}
-        _http_kwargs: dict = {
-            "headers": headers,
-            "timeout": float(connect_timeout),
-        }
-        if _oauth_auth is not None:
-            _http_kwargs["auth"] = _oauth_auth
-        async with streamablehttp_client(url, **_http_kwargs) as (
-            read_stream, write_stream, _get_session_id,
-        ):
-            async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
-                await session.initialize()
-                self.session = session
-                await self._discover_tools()
-                self._ready.set()
-                await self._shutdown_event.wait()
+
+        if _MCP_NEW_HTTP:
+            # New API (mcp >= 1.24.0): build an explicit httpx.AsyncClient
+            # matching the SDK's own create_mcp_http_client defaults.
+            import httpx
+
+            client_kwargs: dict = {
+                "follow_redirects": True,
+                "timeout": httpx.Timeout(float(connect_timeout), read=300.0),
+            }
+            if headers:
+                client_kwargs["headers"] = headers
+            if _oauth_auth is not None:
+                client_kwargs["auth"] = _oauth_auth
+
+            # Caller owns the client lifecycle — the SDK skips cleanup when
+            # http_client is provided, so we wrap in async-with.
+            async with httpx.AsyncClient(**client_kwargs) as http_client:
+                async with streamable_http_client(url, http_client=http_client) as (
+                    read_stream, write_stream, _get_session_id,
+                ):
+                    async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
+                        await session.initialize()
+                        self.session = session
+                        await self._discover_tools()
+                        self._ready.set()
+                        await self._shutdown_event.wait()
+        else:
+            # Deprecated API (mcp < 1.24.0): manages httpx client internally.
+            _http_kwargs: dict = {
+                "headers": headers,
+                "timeout": float(connect_timeout),
+            }
+            if _oauth_auth is not None:
+                _http_kwargs["auth"] = _oauth_auth
+            async with streamablehttp_client(url, **_http_kwargs) as (
+                read_stream, write_stream, _get_session_id,
+            ):
+                async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
+                    await session.initialize()
+                    self.session = session
+                    await self._discover_tools()
+                    self._ready.set()
+                    await self._shutdown_event.wait()

    async def _discover_tools(self):
        """Discover tools from the connected session."""
@@ -339,7 +339,7 @@ class MemoryStore:
        entries = self._entries_for(target)
        current = self._char_count(target)
        limit = self._char_limit(target)
-        pct = int((current / limit) * 100) if limit > 0 else 0
+        pct = min(100, int((current / limit) * 100)) if limit > 0 else 0

        resp = {
            "success": True,
@@ -360,7 +360,7 @@ class MemoryStore:
        limit = self._char_limit(target)
        content = ENTRY_DELIMITER.join(entries)
        current = len(content)
-        pct = int((current / limit) * 100) if limit > 0 else 0
+        pct = min(100, int((current / limit) * 100)) if limit > 0 else 0

        if target == "user":
            header = f"USER PROFILE (who the user is) [{pct}% — {current:,}/{limit:,} chars]"
@@ -392,23 +392,30 @@ def session_search(
            }, ensure_ascii=False)

        summaries = []
-        for (session_id, match_info, _, _), result in zip(tasks, results):
+        for (session_id, match_info, conversation_text, _), result in zip(tasks, results):
            if isinstance(result, Exception):
                logging.warning(
                    "Failed to summarize session %s: %s",
-                    session_id,
-                    result,
-                    exc_info=True,
+                    session_id, result, exc_info=True,
                )
-                continue
+                result = None
+
+            entry = {
+                "session_id": session_id,
+                "when": _format_timestamp(match_info.get("session_started")),
+                "source": match_info.get("source", "unknown"),
+                "model": match_info.get("model"),
+            }
+
            if result:
-                summaries.append({
-                    "session_id": session_id,
-                    "when": _format_timestamp(match_info.get("session_started")),
-                    "source": match_info.get("source", "unknown"),
-                    "model": match_info.get("model"),
-                    "summary": result,
-                })
+                entry["summary"] = result
+            else:
+                # Fallback: raw preview so matched sessions aren't silently
+                # dropped when the summarizer is unavailable (fixes #3409).
+                preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
+                entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
+
+            summaries.append(entry)

        return json.dumps({
            "success": True,
@@ -251,6 +251,7 @@ class GitHubSource(SkillSource):
        {"repo": "openai/skills", "path": "skills/"},
        {"repo": "anthropics/skills", "path": "skills/"},
        {"repo": "VoltAgent/awesome-agent-skills", "path": "skills/"},
+        {"repo": "garrytan/gstack", "path": ""},
    ]

    def __init__(self, auth: GitHubAuth, extra_taps: Optional[List[Dict]] = None):
@@ -395,7 +396,8 @@ class GitHubSource(SkillSource):
            if dir_name.startswith(".") or dir_name.startswith("_"):
                continue

-            skill_identifier = f"{repo}/{path.rstrip('/')}/{dir_name}"
+            prefix = path.rstrip("/")
+            skill_identifier = f"{repo}/{prefix}/{dir_name}" if prefix else f"{repo}/{dir_name}"
            meta = self.inspect(skill_identifier)
            if meta:
                skills.append(meta)
--- a/Show More
+++ b/Show More