fix(dashboard): include cache tokens in totals, track real API call count

The analytics dashboard had three accuracy issues: 1. TOTAL TOKENS excluded cache_read and cache_write tokens — only counted the non-cached input portion. With 90%+ cache hit rates typical in Hermes, this dramatically undercounted actual token usage (e.g. showing 9.1M when the real total was 169M+). 2. The 'API Calls' card displayed session count (COUNT(*) from sessions table), not actual LLM API requests. A single session makes 10-90 API calls through the tool loop, so this was ~30x lower than reality. 3. cache_write_tokens was stored in the DB but never exposed through the analytics API endpoint or frontend. Changes: - Add api_call_count column to sessions table (schema v7 migration) - Persist api_call_count=1 per LLM API call in run_agent.py - Analytics SQL queries now include cache_write_tokens and api_call_count in daily, by_model, and totals aggregations - Frontend TOTAL TOKENS card now shows input + cache_read + cache_write + output (the full prompt total + output) - API CALLS card now uses real api_call_count from DB - New Cache Hit Rate card shows cache efficiency percentage - Bar chart, tooltips, daily table, model table all use prompt totals (input + cache_read + cache_write) instead of just input - Labels changed from 'Input' to 'Prompt' to reflect the full prompt total - TypeScript interfaces and i18n strings updated (en + zh)
fix(cli): restore messaging toolset for gateway platforms
2026-04-15 12:31:05 +05:30 · 2026-04-14 23:13:35 -07:00 · 2026-04-14 23:13:11 -07:00 · 2026-04-14 23:13:02 -07:00 · 2026-04-14 22:38:17 -07:00 · 2026-04-14 22:37:45 -07:00
91 changed files with 6940 additions and 1085 deletions
@@ -145,6 +145,10 @@
 # Only override here if you need to force a backend without touching config.yaml:
 # TERMINAL_ENV=local

+# Override the container runtime binary (e.g. to use Podman instead of Docker).
+# Useful on systems where Docker's storage driver is broken or unavailable.
+# HERMES_DOCKER_BINARY=/usr/local/bin/podman
+
 # Container images (for singularity/docker/modal backends)
 # TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
 # TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
@@ -9,11 +9,14 @@ on:
      - '**/*.py'
      - '.github/workflows/contributor-check.yml'

+permissions:
+  contents: read
+
 jobs:
  check-attribution:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          fetch-depth: 0  # Full history needed for git log

@@ -28,20 +28,20 @@ jobs:
      name: github-pages
      url: ${{ steps.deploy.outputs.page_url }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml httpx
+        run: pip install pyyaml==6.0.2 httpx==0.28.1

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
@@ -73,10 +73,10 @@ jobs:
          echo "hermes-agent.nousresearch.com" > _site/CNAME

      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
+        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
        with:
          path: _site

      - name: Deploy to GitHub Pages
        id: deploy
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e  # v4
@@ -23,21 +23,21 @@ jobs:
    timeout-minutes: 60
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          submodules: recursive

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130  # v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3

      # Build amd64 only so we can `load` the image for smoke testing.
      # `load: true` cannot export a multi-arch manifest to the local daemon.
      # The multi-arch build follows on push to main / release.
      - name: Build image (amd64, smoke test)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -56,14 +56,14 @@ jobs:

      - name: Log in to Docker Hub
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
-        uses: docker/login-action@v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9  # v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Push multi-arch image (main branch)
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -75,7 +75,7 @@ jobs:

      - name: Push multi-arch image (release)
        if: github.event_name == 'release'
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8  # v6
        with:
          context: .
          file: Dockerfile
@@ -7,13 +7,16 @@ on:
      - '.github/workflows/docs-site-checks.yml'
  workflow_dispatch:

+permissions:
+  contents: read
+
 jobs:
  docs-site-checks:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
@@ -23,7 +26,7 @@ jobs:
        run: npm ci
        working-directory: website

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

@@ -14,6 +14,9 @@ on:
      - 'run_agent.py'
      - 'acp_adapter/**'

+permissions:
+  contents: read
+
 concurrency:
  group: nix-${{ github.ref }}
  cancel-in-progress: true
@@ -26,7 +29,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
      - name: Check flake
@@ -20,14 +20,14 @@ jobs:
    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install dependencies
-        run: pip install httpx pyyaml
+        run: pip install httpx==0.28.1 pyyaml==6.0.2

      - name: Build skills index
        env:
@@ -35,7 +35,7 @@ jobs:
        run: python scripts/build_skills_index.py

      - name: Upload index artifact
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
        with:
          name: skills-index
          path: website/static/api/skills-index.json
@@ -53,25 +53,25 @@ jobs:
    # Only deploy on schedule or manual trigger (not on every push to the script)
    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
          name: skills-index
          path: website/static/api/

-      - uses: actions/setup-node@v4
+      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
        with:
          node-version: 20
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
        with:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
+        run: pip install pyyaml==6.0.2

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py
@@ -92,10 +92,10 @@ jobs:
          echo "hermes-agent.nousresearch.com" > _site/CNAME

      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
+        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
        with:
          path: _site

      - name: Deploy to GitHub Pages
        id: deploy
-        uses: actions/deploy-pages@v4
+        uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e  # v4
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
        with:
          fetch-depth: 0

@@ -149,6 +149,62 @@ jobs:
          "
          fi

+          # --- CI/CD workflow files modified ---
+          WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
+          if [ -n "$WORKFLOW_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: CI/CD workflow files modified
+          Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
+
+          **Files:**
+          \`\`\`
+          ${WORKFLOW_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Dockerfile / container build files modified ---
+          DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
+          if [ -n "$DOCKER_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Container build files modified
+          Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
+
+          **Files:**
+          \`\`\`
+          ${DOCKER_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Dependency manifest files modified ---
+          DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
+          if [ -n "$DEP_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Dependency manifest files modified
+          Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
+
+          **Files:**
+          \`\`\`
+          ${DEP_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
+          ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
+          if [ -n "$ACTIONS_UNPIN" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: GitHub Actions with mutable version tags
+          Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
+
+          **Matches:**
+          \`\`\`
+          ${ACTIONS_UNPIN}
+          \`\`\`
+          "
+          fi
+
          # --- Output results ---
          if [ -n "$FINDINGS" ]; then
            echo "found=true" >> "$GITHUB_OUTPUT"
@@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [main]

+permissions:
+  contents: read
+
 # Cancel in-progress runs for the same PR/branch
 concurrency:
  group: tests-${{ github.ref }}
@@ -17,13 +20,13 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

      - name: Install system dependencies
        run: sudo apt-get update && sudo apt-get install -y ripgrep

      - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11
@@ -49,10 +52,10 @@ jobs:
    timeout-minutes: 10
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4

      - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5

      - name: Set up Python 3.11
        run: uv python install 3.11
@@ -13,7 +13,7 @@ source venv/bin/activate  # ALWAYS activate before running Python
 ```
 hermes-agent/
 ├── run_agent.py          # AIAgent class — core conversation loop
-├── model_tools.py        # Tool orchestration, _discover_tools(), handle_function_call()
+├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
 ├── cli.py                # HermesCLI class — interactive CLI orchestrator
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
@@ -181,7 +181,7 @@ if canonical == "mycommand":

 ## Adding New Tools

-Requires changes in **3 files**:
+Requires changes in **2 files**:

 **1. Create `tools/your_tool.py`:**
 ```python
@@ -204,9 +204,9 @@ registry.register(
 )
 ```

-**2. Add import** in `model_tools.py` `_discover_tools()` list.
+**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.

-**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.

 The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.

@@ -112,6 +112,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "xiaomi": "mimo-v2-omni",
+    "zai": "glm-5v-turbo",
 }

 # OpenRouter app attribution headers
@@ -17,7 +17,10 @@ Improvements over v2:
  - Richer tool call/result detail in summarizer input
 """

+import hashlib
+import json
 import logging
+import re
 import time
 from typing import Any, Dict, List, Optional

@@ -57,6 +60,128 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
+    """Create an informative 1-line summary of a tool call + result.
+
+    Used during the pre-compression pruning pass to replace large tool
+    outputs with a short but useful description of what the tool did,
+    rather than a generic placeholder that carries zero information.
+
+    Returns strings like::
+
+        [terminal] ran `npm test` -> exit 0, 47 lines output
+        [read_file] read config.py from line 1 (1,200 chars)
+        [search_files] content search for 'compress' in agent/ -> 12 matches
+    """
+    try:
+        args = json.loads(tool_args) if tool_args else {}
+    except (json.JSONDecodeError, TypeError):
+        args = {}
+
+    content = tool_content or ""
+    content_len = len(content)
+    line_count = content.count("\n") + 1 if content.strip() else 0
+
+    if tool_name == "terminal":
+        cmd = args.get("command", "")
+        if len(cmd) > 80:
+            cmd = cmd[:77] + "..."
+        exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
+        exit_code = exit_match.group(1) if exit_match else "?"
+        return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
+
+    if tool_name == "read_file":
+        path = args.get("path", "?")
+        offset = args.get("offset", 1)
+        return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
+
+    if tool_name == "write_file":
+        path = args.get("path", "?")
+        written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
+        return f"[write_file] wrote to {path} ({written_lines} lines)"
+
+    if tool_name == "search_files":
+        pattern = args.get("pattern", "?")
+        path = args.get("path", ".")
+        target = args.get("target", "content")
+        match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
+        count = match_count.group(1) if match_count else "?"
+        return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
+
+    if tool_name == "patch":
+        path = args.get("path", "?")
+        mode = args.get("mode", "replace")
+        return f"[patch] {mode} in {path} ({content_len:,} chars result)"
+
+    if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
+                     "browser_type", "browser_scroll", "browser_vision"):
+        url = args.get("url", "")
+        ref = args.get("ref", "")
+        detail = f" {url}" if url else (f" ref={ref}" if ref else "")
+        return f"[{tool_name}]{detail} ({content_len:,} chars)"
+
+    if tool_name == "web_search":
+        query = args.get("query", "?")
+        return f"[web_search] query='{query}' ({content_len:,} chars result)"
+
+    if tool_name == "web_extract":
+        urls = args.get("urls", [])
+        url_desc = urls[0] if isinstance(urls, list) and urls else "?"
+        if isinstance(urls, list) and len(urls) > 1:
+            url_desc += f" (+{len(urls) - 1} more)"
+        return f"[web_extract] {url_desc} ({content_len:,} chars)"
+
+    if tool_name == "delegate_task":
+        goal = args.get("goal", "")
+        if len(goal) > 60:
+            goal = goal[:57] + "..."
+        return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
+
+    if tool_name == "execute_code":
+        code_preview = (args.get("code") or "")[:60].replace("\n", " ")
+        if len(args.get("code", "")) > 60:
+            code_preview += "..."
+        return f"[execute_code] `{code_preview}` ({line_count} lines output)"
+
+    if tool_name in ("skill_view", "skills_list", "skill_manage"):
+        name = args.get("name", "?")
+        return f"[{tool_name}] name={name} ({content_len:,} chars)"
+
+    if tool_name == "vision_analyze":
+        question = args.get("question", "")[:50]
+        return f"[vision_analyze] '{question}' ({content_len:,} chars)"
+
+    if tool_name == "memory":
+        action = args.get("action", "?")
+        target = args.get("target", "?")
+        return f"[memory] {action} on {target}"
+
+    if tool_name == "todo":
+        return "[todo] updated task list"
+
+    if tool_name == "clarify":
+        return "[clarify] asked user a question"
+
+    if tool_name == "text_to_speech":
+        return f"[text_to_speech] generated audio ({content_len:,} chars)"
+
+    if tool_name == "cronjob":
+        action = args.get("action", "?")
+        return f"[cronjob] {action}"
+
+    if tool_name == "process":
+        action = args.get("action", "?")
+        sid = args.get("session_id", "?")
+        return f"[process] {action} session={sid}"
+
+    # Generic fallback
+    first_arg = ""
+    for k, v in list(args.items())[:2]:
+        sv = str(v)[:40]
+        first_arg += f" {k}={sv}"
+    return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
+
+
 class ContextCompressor(ContextEngine):
    """Default context engine — compresses conversation context via lossy summarization.

@@ -78,6 +203,8 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_compression_savings_pct = 100.0
+        self._ineffective_compression_count = 0

    def update_model(
        self,
@@ -167,6 +294,9 @@ class ContextCompressor(ContextEngine):

        # Stores the previous compaction summary for iterative updates
        self._previous_summary: Optional[str] = None
+        # Anti-thrashing: track whether last compression was effective
+        self._last_compression_savings_pct: float = 100.0
+        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0

    def update_from_response(self, usage: Dict[str, Any]):
@@ -175,9 +305,26 @@ class ContextCompressor(ContextEngine):
        self.last_completion_tokens = usage.get("completion_tokens", 0)

    def should_compress(self, prompt_tokens: int = None) -> bool:
-        """Check if context exceeds the compression threshold."""
+        """Check if context exceeds the compression threshold.
+
+        Includes anti-thrashing protection: if the last two compressions
+        each saved less than 10%, skip compression to avoid infinite loops
+        where each pass removes only 1-2 messages.
+        """
        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
-        return tokens >= self.threshold_tokens
+        if tokens < self.threshold_tokens:
+            return False
+        # Anti-thrashing: back off if recent compressions were ineffective
+        if self._ineffective_compression_count >= 2:
+            if not self.quiet_mode:
+                logger.warning(
+                    "Compression skipped — last %d compressions saved <10%% each. "
+                    "Consider /new to start a fresh session, or /compress <topic> "
+                    "for focused compression.",
+                    self._ineffective_compression_count,
+                )
+            return False
+        return True

    # ------------------------------------------------------------------
    # Tool output pruning (cheap pre-pass, no LLM call)
@@ -187,7 +334,16 @@ class ContextCompressor(ContextEngine):
        self, messages: List[Dict[str, Any]], protect_tail_count: int,
        protect_tail_tokens: int | None = None,
    ) -> tuple[List[Dict[str, Any]], int]:
-        """Replace old tool result contents with a short placeholder.
+        """Replace old tool result contents with informative 1-line summaries.
+
+        Instead of a generic placeholder, generates a summary like::
+
+            [terminal] ran `npm test` -> exit 0, 47 lines output
+            [read_file] read config.py from line 1 (3,400 chars)
+
+        Also deduplicates identical tool results (e.g. reading the same file
+        5x keeps only the newest full copy) and truncates large tool_call
+        arguments in assistant messages outside the protected tail.

        Walks backward from the end, protecting the most recent messages that
        fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -203,6 +359,22 @@ class ContextCompressor(ContextEngine):
        result = [m.copy() for m in messages]
        pruned = 0

+        # Build index: tool_call_id -> (tool_name, arguments_json)
+        call_id_to_tool: Dict[str, tuple] = {}
+        for msg in result:
+            if msg.get("role") == "assistant":
+                for tc in msg.get("tool_calls") or []:
+                    if isinstance(tc, dict):
+                        cid = tc.get("id", "")
+                        fn = tc.get("function", {})
+                        call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
+                    else:
+                        cid = getattr(tc, "id", "") or ""
+                        fn = getattr(tc, "function", None)
+                        name = getattr(fn, "name", "unknown") if fn else "unknown"
+                        args_str = getattr(fn, "arguments", "") if fn else ""
+                        call_id_to_tool[cid] = (name, args_str)
+
        # Determine the prune boundary
        if protect_tail_tokens is not None and protect_tail_tokens > 0:
            # Token-budget approach: walk backward accumulating tokens
@@ -211,7 +383,8 @@ class ContextCompressor(ContextEngine):
            min_protect = min(protect_tail_count, len(result) - 1)
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
-                content_len = len(msg.get("content") or "")
+                raw_content = msg.get("content") or ""
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -226,18 +399,69 @@ class ContextCompressor(ContextEngine):
        else:
            prune_boundary = len(result) - protect_tail_count

+        # Pass 1: Deduplicate identical tool results.
+        # When the same file is read multiple times, keep only the most recent
+        # full copy and replace older duplicates with a back-reference.
+        content_hashes: dict = {}  # hash -> (index, tool_call_id)
+        for i in range(len(result) - 1, -1, -1):
+            msg = result[i]
+            if msg.get("role") != "tool":
+                continue
+            content = msg.get("content") or ""
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
+            if len(content) < 200:
+                continue
+            h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
+            if h in content_hashes:
+                # This is an older duplicate — replace with back-reference
+                result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
+                pruned += 1
+            else:
+                content_hashes[h] = (i, msg.get("tool_call_id", "?"))
+
+        # Pass 2: Replace old tool results with informative summaries
        for i in range(prune_boundary):
            msg = result[i]
            if msg.get("role") != "tool":
                continue
            content = msg.get("content", "")
+            # Skip multimodal content (list of content blocks)
+            if isinstance(content, list):
+                continue
            if not content or content == _PRUNED_TOOL_PLACEHOLDER:
                continue
+            # Skip already-deduplicated or previously-summarized results
+            if content.startswith("[Duplicate tool output"):
+                continue
            # Only prune if the content is substantial (>200 chars)
            if len(content) > 200:
-                result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
+                call_id = msg.get("tool_call_id", "")
+                tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
+                summary = _summarize_tool_result(tool_name, tool_args, content)
+                result[i] = {**msg, "content": summary}
                pruned += 1

+        # Pass 3: Truncate large tool_call arguments in assistant messages
+        # outside the protected tail. write_file with 50KB content, for
+        # example, survives pruning entirely without this.
+        for i in range(prune_boundary):
+            msg = result[i]
+            if msg.get("role") != "assistant" or not msg.get("tool_calls"):
+                continue
+            new_tcs = []
+            modified = False
+            for tc in msg["tool_calls"]:
+                if isinstance(tc, dict):
+                    args = tc.get("function", {}).get("arguments", "")
+                    if len(args) > 500:
+                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
+                        modified = True
+                new_tcs.append(tc)
+            if modified:
+                result[i] = {**msg, "tool_calls": new_tcs}
+
        return result, pruned

    # ------------------------------------------------------------------
@@ -357,29 +581,37 @@ class ContextCompressor(ContextEngine):
        )

        # Shared structured template (used by both paths).
-        # Key changes vs v1:
-        #   - "Pending User Asks" section (from Claude Code) explicitly tracks
-        #     unanswered questions so the model knows what's resolved vs open
-        #   - "Remaining Work" replaces "Next Steps" to avoid reading as active
-        #     instructions
-        #   - "Resolved Questions" makes it clear which questions were already
-        #     answered (prevents model from re-answering them)
        _template_sections = f"""## Goal
 [What the user is trying to accomplish]

 ## Constraints & Preferences
 [User preferences, coding style, constraints, important decisions]

-## Progress
-### Done
-[Completed work — include specific file paths, commands run, results obtained]
-### In Progress
-[Work currently underway]
-### Blocked
-[Any blockers or issues encountered]
+## Completed Actions
+[Numbered list of concrete actions taken — include tool used, target, and outcome.
+Format each as: N. ACTION target — outcome [tool: name]
+Example:
+1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
+2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
+3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
+Be specific with file paths, commands, line numbers, and results.]
+
+## Active State
+[Current working state — include:
+- Working directory and branch (if applicable)
+- Modified/created files with brief note on each
+- Test status (X/Y passing)
+- Any running processes or servers
+- Environment details that matter]
+
+## In Progress
+[Work currently underway — what was being done when compaction fired]
+
+## Blocked
+[Any blockers, errors, or issues not yet resolved. Include exact error messages.]

 ## Key Decisions
-[Important technical decisions and why they were made]
+[Important technical decisions and WHY they were made]

 ## Resolved Questions
 [Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -396,10 +628,7 @@ class ContextCompressor(ContextEngine):
 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

-## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries]
-
-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
+Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

 Write only the summary body. Do not include any preamble or prefix."""

@@ -415,7 +644,7 @@ PREVIOUS SUMMARY:
 NEW TURNS TO INCORPORATE:
 {content_to_summarize}

-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete.

 {_template_sections}"""
        else:
@@ -450,7 +679,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "api_mode": self.api_mode,
                },
                "messages": [{"role": "user", "content": prompt}],
-                "max_tokens": summary_budget * 2,
+                "max_tokens": int(summary_budget * 1.3),
                # timeout resolved from auxiliary.compression.timeout config by call_llm
            }
            if self.summary_model:
@@ -464,8 +693,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
+            self._summary_model_fallen_back = False
            return self._with_summary_prefix(summary)
        except RuntimeError:
+            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
@@ -473,12 +704,42 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
            return None
        except Exception as e:
-            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            # If the summary model is different from the main model and the
+            # error looks permanent (model not found, 503, 404), fall back to
+            # using the main model instead of entering cooldown that leaves
+            # context growing unbounded.  (#8620 sub-issue 4)
+            _status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
+            _err_str = str(e).lower()
+            _is_model_not_found = (
+                _status in (404, 503)
+                or "model_not_found" in _err_str
+                or "does not exist" in _err_str
+                or "no available channel" in _err_str
+            )
+            if (
+                _is_model_not_found
+                and self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' not available (%s). "
+                    "Falling back to main model '%s' for compression.",
+                    self.summary_model, e, self.model,
+                )
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0  # no cooldown
+                return self._generate_summary(messages, summary_budget)  # retry immediately
+
+            # Transient errors (timeout, rate limit, network) — shorter cooldown
+            _transient_cooldown = 60
+            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
                e,
-                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+                _transient_cooldown,
            )
            return None

@@ -744,11 +1005,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        compressed = []
        for i in range(compress_start):
            msg = messages[i].copy()
-            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (
-                    (msg.get("content") or "")
-                    + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                )
+            if i == 0 and msg.get("role") == "system":
+                existing = msg.get("content") or ""
+                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -806,14 +1067,24 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        compressed = self._sanitize_tool_pairs(compressed)

+        new_estimate = estimate_messages_tokens_rough(compressed)
+        saved_estimate = display_tokens - new_estimate
+
+        # Anti-thrashing: track compression effectiveness
+        savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
+        self._last_compression_savings_pct = savings_pct
+        if savings_pct < 10:
+            self._ineffective_compression_count += 1
+        else:
+            self._ineffective_compression_count = 0
+
        if not self.quiet_mode:
-            new_estimate = estimate_messages_tokens_rough(compressed)
-            saved_estimate = display_tokens - new_estimate
            logger.info(
-                "Compressed: %d -> %d messages (~%d tokens saved)",
+                "Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
                n_messages,
                len(compressed),
                saved_estimate,
+                savings_pct,
            )
            logger.info("Compression #%d complete", self.compression_count)

@@ -36,6 +36,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
+    "xai", "x-ai", "x.ai", "grok",
    "qwen-portal",
 })

@@ -989,6 +989,7 @@ def _prune_orphaned_branches(repo_root: str) -> None:
 _ACCENT_ANSI_DEFAULT = "\033[1;38;2;255;215;0m"  # True-color #FFD700 bold — fallback
 _BOLD = "\033[1m"
 _RST = "\033[0m"
+_STREAM_PAD = "    "  # 4-space indent for streamed response text (matches Panel padding)


 def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str:
@@ -1712,9 +1713,9 @@ class HermesCLI:
        # Parse and validate toolsets
        self.enabled_toolsets = toolsets
        if toolsets and "all" not in toolsets and "*" not in toolsets:
-            # Validate each toolset — MCP server names are added by
-            # _get_platform_tools() but aren't registered in TOOLSETS yet
-            # (that happens later in _sync_mcp_toolsets), so exclude them.
+            # Validate each toolset — MCP server names are resolved via
+            # live registry aliases (registered during discover_mcp_tools),
+            # but discovery hasn't run yet at this point, so exclude them.
            mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys())
            invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names]
            if invalid:
@@ -2580,7 +2581,7 @@ class HermesCLI:
        _tc = getattr(self, "_stream_text_ansi", "")
        while "\n" in self._stream_buf:
            line, self._stream_buf = self._stream_buf.split("\n", 1)
-            _cprint(f"{_tc}{line}{_RST}" if _tc else line)
+            _cprint(f"{_STREAM_PAD}{_tc}{line}{_RST}" if _tc else f"{_STREAM_PAD}{line}")

    def _flush_stream(self) -> None:
        """Emit any remaining partial line from the stream buffer and close the box."""
@@ -2597,7 +2598,7 @@ class HermesCLI:

        if self._stream_buf:
            _tc = getattr(self, "_stream_text_ansi", "")
-            _cprint(f"{_tc}{self._stream_buf}{_RST}" if _tc else self._stream_buf)
+            _cprint(f"{_STREAM_PAD}{_tc}{self._stream_buf}{_RST}" if _tc else f"{_STREAM_PAD}{self._stream_buf}")
            self._stream_buf = ""

        # Close the response box
@@ -5761,7 +5762,7 @@ class HermesCLI:
                        border_style=_resp_color,
                        style=_resp_text,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))
                else:
                    _cprint("  (No response generated)")
@@ -5885,7 +5886,7 @@ class HermesCLI:
                        title_align="left",
                        border_style=_resp_color,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))
                else:
                    _cprint("  💬 /btw: (no response)")
@@ -7648,7 +7649,7 @@ class HermesCLI:
                        label = " ⚕ Hermes "
                        fill = w - 2 - len(label)
                        _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
-                    _cprint(sentence.rstrip())
+                    _cprint(f"{_STREAM_PAD}{sentence.rstrip()}")

                tts_thread = threading.Thread(
                    target=stream_tts_to_speaker,
@@ -7879,7 +7880,7 @@ class HermesCLI:
                        border_style=_resp_color,
                        style=_resp_text,
                        box=rich_box.HORIZONTALS,
-                        padding=(1, 2),
+                        padding=(1, 4),
                    ))


@@ -8631,6 +8632,24 @@ class HermesCLI:
            self._should_exit = True
            event.app.exit()

+        _modal_prompt_active = Condition(
+            lambda: bool(self._secret_state or self._sudo_state)
+        )
+
+        @kb.add('escape', filter=_modal_prompt_active, eager=True)
+        def handle_escape_modal(event):
+            """ESC cancels active secret/sudo prompts."""
+            if self._secret_state:
+                self._cancel_secret_capture()
+                event.app.current_buffer.reset()
+                event.app.invalidate()
+                return
+            if self._sudo_state:
+                self._sudo_state["response_queue"].put("")
+                self._sudo_state = None
+                event.app.invalidate()
+                return
+
        @kb.add('c-z')
        def handle_ctrl_z(event):
            """Handle Ctrl+Z - suspend process to background (Unix only)."""
@@ -8928,9 +8947,9 @@ class HermesCLI:
            if cli_ref._voice_processing:
                return "transcribing..."
            if cli_ref._sudo_state:
-                return "type password (hidden), Enter to skip"
+                return "type password (hidden), Enter to submit · ESC to skip"
            if cli_ref._secret_state:
-                return "type secret (hidden), Enter to skip"
+                return "type secret (hidden), Enter to submit · ESC to skip"
            if cli_ref._approval_state:
                return ""
            if cli_ref._clarify_freetext:
@@ -9173,7 +9192,7 @@ class HermesCLI:
            prompt = state.get("prompt") or f"Enter value for {state.get('var_name', 'secret')}"
            metadata = state.get("metadata") or {}
            help_text = metadata.get("help")
-            body = 'Enter secret below (hidden), or press Enter to skip'
+            body = 'Enter secret below (hidden), ESC or Ctrl+C to skip'
            content_lines = [prompt, body]
            if help_text:
                content_lines.insert(1, str(help_text))
@@ -1,13 +1,14 @@
 #!/bin/bash
-# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
+# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
 set -e

-HERMES_HOME="/opt/data"
+HERMES_HOME="${HERMES_HOME:-/opt/data}"
 INSTALL_DIR="/opt/hermes"

 # --- Privilege dropping via gosu ---
-# When started as root (the default), optionally remap the hermes user/group
-# to match host-side ownership, fix volume permissions, then re-exec as hermes.
+# When started as root (the default for Docker, or fakeroot in rootless Podman),
+# optionally remap the hermes user/group to match host-side ownership, fix volume
+# permissions, then re-exec as hermes.
 if [ "$(id -u)" = "0" ]; then
    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
        echo "Changing hermes UID to $HERMES_UID"
@@ -16,13 +17,19 @@ if [ "$(id -u)" = "0" ]; then

    if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
        echo "Changing hermes GID to $HERMES_GID"
-        groupmod -g "$HERMES_GID" hermes
+        # -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
+        # as "dialout" in the Debian-based container image)
+        groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
    fi

    actual_hermes_uid=$(id -u hermes)
    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
-        chown -R hermes:hermes "$HERMES_HOME"
+        # In rootless Podman the container's "root" is mapped to an unprivileged
+        # host UID — chown will fail.  That's fine: the volume is already owned
+        # by the mapped user on the host side.
+        chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
+            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

    echo "Dropping root privileges"
@@ -10,6 +10,7 @@ Exposes an HTTP server with endpoints:
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - GET  /health                     — health check
+- GET  /health/detailed            — rich status for cross-container dashboard probing

 Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat,
 AnythingLLM, NextChat, ChatBox, etc.) can connect to hermes-agent
@@ -514,6 +515,8 @@ class APIServerAdapter(BasePlatformAdapter):
        session_id: Optional[str] = None,
        stream_delta_callback=None,
        tool_progress_callback=None,
+        tool_start_callback=None,
+        tool_complete_callback=None,
    ) -> Any:
        """
        Create an AIAgent instance using the gateway's runtime config.
@@ -552,6 +555,8 @@ class APIServerAdapter(BasePlatformAdapter):
            platform="api_server",
            stream_delta_callback=stream_delta_callback,
            tool_progress_callback=tool_progress_callback,
+            tool_start_callback=tool_start_callback,
+            tool_complete_callback=tool_complete_callback,
            session_db=self._ensure_session_db(),
            fallback_model=fallback_model,
        )
@@ -565,6 +570,27 @@ class APIServerAdapter(BasePlatformAdapter):
        """GET /health — simple health check."""
        return web.json_response({"status": "ok", "platform": "hermes-agent"})

+    async def _handle_health_detailed(self, request: "web.Request") -> "web.Response":
+        """GET /health/detailed — rich status for cross-container dashboard probing.
+
+        Returns gateway state, connected platforms, PID, and uptime so the
+        dashboard can display full status without needing a shared PID file or
+        /proc access.  No authentication required.
+        """
+        from gateway.status import read_runtime_status
+
+        runtime = read_runtime_status() or {}
+        return web.json_response({
+            "status": "ok",
+            "platform": "hermes-agent",
+            "gateway_state": runtime.get("gateway_state"),
+            "platforms": runtime.get("platforms", {}),
+            "active_agents": runtime.get("active_agents", 0),
+            "exit_reason": runtime.get("exit_reason"),
+            "updated_at": runtime.get("updated_at"),
+            "pid": os.getpid(),
+        })
+
    async def _handle_models(self, request: "web.Request") -> "web.Response":
        """GET /v1/models — return hermes-agent as an available model."""
        auth_err = self._check_auth(request)
@@ -943,6 +969,427 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

+    async def _write_sse_responses(
+        self,
+        request: "web.Request",
+        response_id: str,
+        model: str,
+        created_at: int,
+        stream_q,
+        agent_task,
+        agent_ref,
+        conversation_history: List[Dict[str, str]],
+        user_message: str,
+        instructions: Optional[str],
+        conversation: Optional[str],
+        store: bool,
+        session_id: str,
+    ) -> "web.StreamResponse":
+        """Write an SSE stream for POST /v1/responses (OpenAI Responses API).
+
+        Emits spec-compliant event types as the agent runs:
+
+        - ``response.created`` — initial envelope (status=in_progress)
+        - ``response.output_text.delta`` / ``response.output_text.done`` —
+          streamed assistant text
+        - ``response.output_item.added`` / ``response.output_item.done``
+          with ``item.type == "function_call"`` — when the agent invokes a
+          tool (both events fire; the ``done`` event carries the finalized
+          ``arguments`` string)
+        - ``response.output_item.added`` with
+          ``item.type == "function_call_output"`` — tool result with
+          ``{call_id, output, status}``
+        - ``response.completed`` — terminal event carrying the full
+          response object with all output items + usage (same payload
+          shape as the non-streaming path for parity)
+        - ``response.failed`` — terminal event on agent error
+
+        If the client disconnects mid-stream, ``agent.interrupt()`` is
+        called so the agent stops issuing upstream LLM calls, then the
+        asyncio task is cancelled.  When ``store=True`` the full response
+        is persisted to the ResponseStore in a ``finally`` block so GET
+        /v1/responses/{id} and ``previous_response_id`` chaining work the
+        same as the batch path.
+        """
+        import queue as _q
+
+        sse_headers = {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+        }
+        origin = request.headers.get("Origin", "")
+        cors = self._cors_headers_for_origin(origin) if origin else None
+        if cors:
+            sse_headers.update(cors)
+        if session_id:
+            sse_headers["X-Hermes-Session-Id"] = session_id
+        response = web.StreamResponse(status=200, headers=sse_headers)
+        await response.prepare(request)
+
+        # State accumulated during the stream
+        final_text_parts: List[str] = []
+        # Track open function_call items by name so we can emit a matching
+        # ``done`` event when the tool completes.  Order preserved.
+        pending_tool_calls: List[Dict[str, Any]] = []
+        # Output items we've emitted so far (used to build the terminal
+        # response.completed payload).  Kept in the order they appeared.
+        emitted_items: List[Dict[str, Any]] = []
+        # Monotonic counter for output_index (spec requires it).
+        output_index = 0
+        # Monotonic counter for call_id generation if the agent doesn't
+        # provide one (it doesn't, from tool_progress_callback).
+        call_counter = 0
+        # Canonical Responses SSE events include a monotonically increasing
+        # sequence_number. Add it server-side for every emitted event so
+        # clients that validate the OpenAI event schema can parse our stream.
+        sequence_number = 0
+        # Track the assistant message item id + content index for text
+        # delta events — the spec ties deltas to a specific item.
+        message_item_id = f"msg_{uuid.uuid4().hex[:24]}"
+        message_output_index: Optional[int] = None
+        message_opened = False
+
+        async def _write_event(event_type: str, data: Dict[str, Any]) -> None:
+            nonlocal sequence_number
+            if "sequence_number" not in data:
+                data["sequence_number"] = sequence_number
+            sequence_number += 1
+            payload = f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+            await response.write(payload.encode())
+
+        def _envelope(status: str) -> Dict[str, Any]:
+            env: Dict[str, Any] = {
+                "id": response_id,
+                "object": "response",
+                "status": status,
+                "created_at": created_at,
+                "model": model,
+            }
+            return env
+
+        final_response_text = ""
+        agent_error: Optional[str] = None
+        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        try:
+            # response.created — initial envelope, status=in_progress
+            created_env = _envelope("in_progress")
+            created_env["output"] = []
+            await _write_event("response.created", {
+                "type": "response.created",
+                "response": created_env,
+            })
+            last_activity = time.monotonic()
+
+            async def _open_message_item() -> None:
+                """Emit response.output_item.added for the assistant message
+                the first time any text delta arrives."""
+                nonlocal message_opened, message_output_index, output_index
+                if message_opened:
+                    return
+                message_opened = True
+                message_output_index = output_index
+                output_index += 1
+                item = {
+                    "id": message_item_id,
+                    "type": "message",
+                    "status": "in_progress",
+                    "role": "assistant",
+                    "content": [],
+                }
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": message_output_index,
+                    "item": item,
+                })
+
+            async def _emit_text_delta(delta_text: str) -> None:
+                await _open_message_item()
+                final_text_parts.append(delta_text)
+                await _write_event("response.output_text.delta", {
+                    "type": "response.output_text.delta",
+                    "item_id": message_item_id,
+                    "output_index": message_output_index,
+                    "content_index": 0,
+                    "delta": delta_text,
+                    "logprobs": [],
+                })
+
+            async def _emit_tool_started(payload: Dict[str, Any]) -> str:
+                """Emit response.output_item.added for a function_call.
+
+                Returns the call_id so the matching completion event can
+                reference it.  Prefer the real ``tool_call_id`` from the
+                agent when available; fall back to a generated call id for
+                safety in tests or older code paths.
+                """
+                nonlocal output_index, call_counter
+                call_counter += 1
+                call_id = payload.get("tool_call_id") or f"call_{response_id[5:]}_{call_counter}"
+                args = payload.get("arguments", {})
+                if isinstance(args, dict):
+                    arguments_str = json.dumps(args)
+                else:
+                    arguments_str = str(args)
+                item = {
+                    "id": f"fc_{uuid.uuid4().hex[:24]}",
+                    "type": "function_call",
+                    "status": "in_progress",
+                    "name": payload.get("name", ""),
+                    "call_id": call_id,
+                    "arguments": arguments_str,
+                }
+                idx = output_index
+                output_index += 1
+                pending_tool_calls.append({
+                    "call_id": call_id,
+                    "name": payload.get("name", ""),
+                    "arguments": arguments_str,
+                    "item_id": item["id"],
+                    "output_index": idx,
+                })
+                emitted_items.append({
+                    "type": "function_call",
+                    "name": payload.get("name", ""),
+                    "arguments": arguments_str,
+                    "call_id": call_id,
+                })
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": idx,
+                    "item": item,
+                })
+                return call_id
+
+            async def _emit_tool_completed(payload: Dict[str, Any]) -> None:
+                """Emit response.output_item.done (function_call) followed
+                by response.output_item.added (function_call_output)."""
+                nonlocal output_index
+                call_id = payload.get("tool_call_id")
+                result = payload.get("result", "")
+                pending = None
+                if call_id:
+                    for i, p in enumerate(pending_tool_calls):
+                        if p["call_id"] == call_id:
+                            pending = pending_tool_calls.pop(i)
+                            break
+                if pending is None:
+                    # Completion without a matching start — skip to avoid
+                    # emitting orphaned done events.
+                    return
+
+                # function_call done
+                done_item = {
+                    "id": pending["item_id"],
+                    "type": "function_call",
+                    "status": "completed",
+                    "name": pending["name"],
+                    "call_id": pending["call_id"],
+                    "arguments": pending["arguments"],
+                }
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": pending["output_index"],
+                    "item": done_item,
+                })
+
+                # function_call_output added (result)
+                result_str = result if isinstance(result, str) else json.dumps(result)
+                output_parts = [{"type": "input_text", "text": result_str}]
+                output_item = {
+                    "id": f"fco_{uuid.uuid4().hex[:24]}",
+                    "type": "function_call_output",
+                    "call_id": pending["call_id"],
+                    "output": output_parts,
+                    "status": "completed",
+                }
+                idx = output_index
+                output_index += 1
+                emitted_items.append({
+                    "type": "function_call_output",
+                    "call_id": pending["call_id"],
+                    "output": output_parts,
+                })
+                await _write_event("response.output_item.added", {
+                    "type": "response.output_item.added",
+                    "output_index": idx,
+                    "item": output_item,
+                })
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": idx,
+                    "item": output_item,
+                })
+
+            # Main drain loop — thread-safe queue fed by agent callbacks.
+            async def _dispatch(it) -> None:
+                """Route a queue item to the correct SSE emitter.
+
+                Plain strings are text deltas.  Tagged tuples with
+                ``__tool_started__`` / ``__tool_completed__`` prefixes
+                are tool lifecycle events.
+                """
+                if isinstance(it, tuple) and len(it) == 2 and isinstance(it[0], str):
+                    tag, payload = it
+                    if tag == "__tool_started__":
+                        await _emit_tool_started(payload)
+                    elif tag == "__tool_completed__":
+                        await _emit_tool_completed(payload)
+                    # Unknown tags are silently ignored (forward-compat).
+                elif isinstance(it, str):
+                    await _emit_text_delta(it)
+                # Other types (non-string, non-tuple) are silently dropped.
+
+            loop = asyncio.get_event_loop()
+            while True:
+                try:
+                    item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
+                except _q.Empty:
+                    if agent_task.done():
+                        # Drain remaining
+                        while True:
+                            try:
+                                item = stream_q.get_nowait()
+                                if item is None:
+                                    break
+                                await _dispatch(item)
+                                last_activity = time.monotonic()
+                            except _q.Empty:
+                                break
+                        break
+                    if time.monotonic() - last_activity >= CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS:
+                        await response.write(b": keepalive\n\n")
+                        last_activity = time.monotonic()
+                    continue
+
+                if item is None:  # EOS sentinel
+                    break
+
+                await _dispatch(item)
+                last_activity = time.monotonic()
+
+            # Pick up agent result + usage from the completed task
+            try:
+                result, agent_usage = await agent_task
+                usage = agent_usage or usage
+                # If the agent produced a final_response but no text
+                # deltas were streamed (e.g. some providers only emit
+                # the full response at the end), emit a single fallback
+                # delta so Responses clients still receive a live text part.
+                agent_final = result.get("final_response", "") if isinstance(result, dict) else ""
+                if agent_final and not final_text_parts:
+                    await _emit_text_delta(agent_final)
+                if agent_final and not final_response_text:
+                    final_response_text = agent_final
+                if isinstance(result, dict) and result.get("error") and not final_response_text:
+                    agent_error = result["error"]
+            except Exception as e:  # noqa: BLE001
+                logger.error("Error running agent for streaming responses: %s", e, exc_info=True)
+                agent_error = str(e)
+
+            # Close the message item if it was opened
+            final_response_text = "".join(final_text_parts) or final_response_text
+            if message_opened:
+                await _write_event("response.output_text.done", {
+                    "type": "response.output_text.done",
+                    "item_id": message_item_id,
+                    "output_index": message_output_index,
+                    "content_index": 0,
+                    "text": final_response_text,
+                    "logprobs": [],
+                })
+                msg_done_item = {
+                    "id": message_item_id,
+                    "type": "message",
+                    "status": "completed",
+                    "role": "assistant",
+                    "content": [
+                        {"type": "output_text", "text": final_response_text}
+                    ],
+                }
+                await _write_event("response.output_item.done", {
+                    "type": "response.output_item.done",
+                    "output_index": message_output_index,
+                    "item": msg_done_item,
+                })
+
+            # Always append a final message item in the completed
+            # response envelope so clients that only parse the terminal
+            # payload still see the assistant text.  This mirrors the
+            # shape produced by _extract_output_items in the batch path.
+            final_items: List[Dict[str, Any]] = list(emitted_items)
+            final_items.append({
+                "type": "message",
+                "role": "assistant",
+                "content": [
+                    {"type": "output_text", "text": final_response_text or (agent_error or "")}
+                ],
+            })
+
+            if agent_error:
+                failed_env = _envelope("failed")
+                failed_env["output"] = final_items
+                failed_env["error"] = {"message": agent_error, "type": "server_error"}
+                failed_env["usage"] = {
+                    "input_tokens": usage.get("input_tokens", 0),
+                    "output_tokens": usage.get("output_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                await _write_event("response.failed", {
+                    "type": "response.failed",
+                    "response": failed_env,
+                })
+            else:
+                completed_env = _envelope("completed")
+                completed_env["output"] = final_items
+                completed_env["usage"] = {
+                    "input_tokens": usage.get("input_tokens", 0),
+                    "output_tokens": usage.get("output_tokens", 0),
+                    "total_tokens": usage.get("total_tokens", 0),
+                }
+                await _write_event("response.completed", {
+                    "type": "response.completed",
+                    "response": completed_env,
+                })
+
+                # Persist for future chaining / GET retrieval, mirroring
+                # the batch path behavior.
+                if store:
+                    full_history = list(conversation_history)
+                    full_history.append({"role": "user", "content": user_message})
+                    if isinstance(result, dict) and result.get("messages"):
+                        full_history.extend(result["messages"])
+                    else:
+                        full_history.append({"role": "assistant", "content": final_response_text})
+                    self._response_store.put(response_id, {
+                        "response": completed_env,
+                        "conversation_history": full_history,
+                        "instructions": instructions,
+                        "session_id": session_id,
+                    })
+                    if conversation:
+                        self._response_store.set_conversation(conversation, response_id)
+
+        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            # Client disconnected — interrupt the agent so it stops
+            # making upstream LLM calls, then cancel the task.
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE client disconnected")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+                try:
+                    await agent_task
+                except (asyncio.CancelledError, Exception):
+                    pass
+            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+
+        return response
+
    async def _handle_responses(self, request: "web.Request") -> "web.Response":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
@@ -1013,11 +1460,13 @@ class APIServerAdapter(BasePlatformAdapter):
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

+        stored_session_id = None
        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored is None:
                return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
            conversation_history = list(stored.get("conversation_history", []))
+            stored_session_id = stored.get("session_id")
            # If no instructions provided, carry forward from previous
            if instructions is None:
                instructions = stored.get("instructions")
@@ -1035,8 +1484,83 @@ class APIServerAdapter(BasePlatformAdapter):
        if body.get("truncation") == "auto" and len(conversation_history) > 100:
            conversation_history = conversation_history[-100:]

-        # Run the agent (with Idempotency-Key support)
-        session_id = str(uuid.uuid4())
+        # Reuse session from previous_response_id chain so the dashboard
+        # groups the entire conversation under one session entry.
+        session_id = stored_session_id or str(uuid.uuid4())
+
+        stream = bool(body.get("stream", False))
+        if stream:
+            # Streaming branch — emit OpenAI Responses SSE events as the
+            # agent runs so frontends can render text deltas and tool
+            # calls in real time.  See _write_sse_responses for details.
+            import queue as _q
+            _stream_q: _q.Queue = _q.Queue()
+
+            def _on_delta(delta):
+                # None from the agent is a CLI box-close signal, not EOS.
+                # Forwarding would kill the SSE stream prematurely; the
+                # SSE writer detects completion via agent_task.done().
+                if delta is not None:
+                    _stream_q.put(delta)
+
+            def _on_tool_progress(event_type, name, preview, args, **kwargs):
+                """Queue non-start tool progress events if needed in future.
+
+                The structured Responses stream uses ``tool_start_callback``
+                and ``tool_complete_callback`` for exact call-id correlation,
+                so progress events are currently ignored here.
+                """
+                return
+
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Queue a started tool for live function_call streaming."""
+                _stream_q.put(("__tool_started__", {
+                    "tool_call_id": tool_call_id,
+                    "name": function_name,
+                    "arguments": function_args or {},
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Queue a completed tool result for live function_call_output streaming."""
+                _stream_q.put(("__tool_completed__", {
+                    "tool_call_id": tool_call_id,
+                    "name": function_name,
+                    "arguments": function_args or {},
+                    "result": function_result,
+                }))
+
+            agent_ref = [None]
+            agent_task = asyncio.ensure_future(self._run_agent(
+                user_message=user_message,
+                conversation_history=conversation_history,
+                ephemeral_system_prompt=instructions,
+                session_id=session_id,
+                stream_delta_callback=_on_delta,
+                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
+                agent_ref=agent_ref,
+            ))
+
+            response_id = f"resp_{uuid.uuid4().hex[:28]}"
+            model_name = body.get("model", self._model_name)
+            created_at = int(time.time())
+
+            return await self._write_sse_responses(
+                request=request,
+                response_id=response_id,
+                model=model_name,
+                created_at=created_at,
+                stream_q=_stream_q,
+                agent_task=agent_task,
+                agent_ref=agent_ref,
+                conversation_history=conversation_history,
+                user_message=user_message,
+                instructions=instructions,
+                conversation=conversation,
+                store=store,
+                session_id=session_id,
+            )

        async def _compute_response():
            return await self._run_agent(
@@ -1111,6 +1635,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "response": response_data,
                "conversation_history": full_history,
                "instructions": instructions,
+                "session_id": session_id,
            })
            # Update conversation mapping so the next request with the same
            # conversation name automatically chains to this response
@@ -1464,6 +1989,8 @@ class APIServerAdapter(BasePlatformAdapter):
        session_id: Optional[str] = None,
        stream_delta_callback=None,
        tool_progress_callback=None,
+        tool_start_callback=None,
+        tool_complete_callback=None,
        agent_ref: Optional[list] = None,
    ) -> tuple:
        """
@@ -1485,6 +2012,8 @@ class APIServerAdapter(BasePlatformAdapter):
                session_id=session_id,
                stream_delta_callback=stream_delta_callback,
                tool_progress_callback=tool_progress_callback,
+                tool_start_callback=tool_start_callback,
+                tool_complete_callback=tool_complete_callback,
            )
            if agent_ref is not None:
                agent_ref[0] = agent
@@ -1621,10 +2150,12 @@ class APIServerAdapter(BasePlatformAdapter):
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

+        stored_session_id = None
        if not conversation_history and previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored:
                conversation_history = list(stored.get("conversation_history", []))
+                stored_session_id = stored.get("session_id")
                if instructions is None:
                    instructions = stored.get("instructions")

@@ -1643,7 +2174,7 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

-        session_id = body.get("session_id") or run_id
+        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions

        async def _run_and_close():
@@ -1783,6 +2314,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app = web.Application(middlewares=mws)
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
+            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
@@ -1696,6 +1696,10 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_update(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/update", "Update initiated~")

+        @tree.command(name="restart", description="Gracefully restart the Hermes gateway")
+        async def slash_restart(interaction: discord.Interaction):
+            await self._run_simple_slash(interaction, "/restart", "Restart requested~")
+
        @tree.command(name="approve", description="Approve a pending dangerous command")
        @discord.app_commands.describe(scope="Optional: 'all', 'session', 'always', 'all session', 'all always'")
        async def slash_approve(interaction: discord.Interaction, scope: str = ""):
@@ -1736,46 +1740,90 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_btw(interaction: discord.Interaction, question: str):
            await self._run_simple_slash(interaction, f"/btw {question}")

-        # Register installed skills as native slash commands (parity with
-        # Telegram, which uses telegram_menu_commands() in commands.py).
-        # Discord allows up to 100 application commands globally.
-        _DISCORD_CMD_LIMIT = 100
+        # Register skills under a single /skill command group with category
+        # subcommand groups.  This uses 1 top-level slot instead of N,
+        # supporting up to 25 categories × 25 skills = 625 skills.
+        self._register_skill_group(tree)
+
+    def _register_skill_group(self, tree) -> None:
+        """Register a ``/skill`` command group with category subcommand groups.
+
+        Skills are organized by their directory category under ``SKILLS_DIR``.
+        Each category becomes a subcommand group; root-level skills become
+        direct subcommands.  Discord supports 25 subcommand groups × 25
+        subcommands each = 625 skills — well beyond the old 100-command cap.
+        """
        try:
-            from hermes_cli.commands import discord_skill_commands
+            from hermes_cli.commands import discord_skill_commands_by_category

-            existing_names = {cmd.name for cmd in tree.get_commands()}
-            remaining_slots = max(0, _DISCORD_CMD_LIMIT - len(existing_names))
+            existing_names = set()
+            try:
+                existing_names = {cmd.name for cmd in tree.get_commands()}
+            except Exception:
+                pass

-            skill_entries, skipped = discord_skill_commands(
-                max_slots=remaining_slots,
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
                reserved_names=existing_names,
            )

-            for discord_name, description, cmd_key in skill_entries:
-                # Closure factory to capture cmd_key per iteration
-                def _make_skill_handler(_key: str):
-                    async def _skill_slash(interaction: discord.Interaction, args: str = ""):
-                        await self._run_simple_slash(interaction, f"{_key} {args}".strip())
-                    return _skill_slash
+            if not categories and not uncategorized:
+                return

-                handler = _make_skill_handler(cmd_key)
-                handler.__name__ = f"skill_{discord_name.replace('-', '_')}"
+            skill_group = discord.app_commands.Group(
+                name="skill",
+                description="Run a Hermes skill",
+            )

+            # ── Helper: build a callback for a skill command key ──
+            def _make_handler(_key: str):
+                @discord.app_commands.describe(args="Optional arguments for the skill")
+                async def _handler(interaction: discord.Interaction, args: str = ""):
+                    await self._run_simple_slash(interaction, f"{_key} {args}".strip())
+                _handler.__name__ = f"skill_{_key.lstrip('/').replace('-', '_')}"
+                return _handler
+
+            # ── Uncategorized (root-level) skills → direct subcommands ──
+            for discord_name, description, cmd_key in uncategorized:
                cmd = discord.app_commands.Command(
                    name=discord_name,
-                    description=description,
-                    callback=handler,
+                    description=description or f"Run the {discord_name} skill",
+                    callback=_make_handler(cmd_key),
                )
-                discord.app_commands.describe(args="Optional arguments for the skill")(cmd)
-                tree.add_command(cmd)
+                skill_group.add_command(cmd)

-            if skipped:
+            # ── Category subcommand groups ──
+            for cat_name in sorted(categories):
+                cat_desc = f"{cat_name.replace('-', ' ').title()} skills"
+                if len(cat_desc) > 100:
+                    cat_desc = cat_desc[:97] + "..."
+                cat_group = discord.app_commands.Group(
+                    name=cat_name,
+                    description=cat_desc,
+                    parent=skill_group,
+                )
+                for discord_name, description, cmd_key in categories[cat_name]:
+                    cmd = discord.app_commands.Command(
+                        name=discord_name,
+                        description=description or f"Run the {discord_name} skill",
+                        callback=_make_handler(cmd_key),
+                    )
+                    cat_group.add_command(cmd)
+
+            tree.add_command(skill_group)
+
+            total = sum(len(v) for v in categories.values()) + len(uncategorized)
+            logger.info(
+                "[%s] Registered /skill group: %d skill(s) across %d categories"
+                " + %d uncategorized",
+                self.name, total, len(categories), len(uncategorized),
+            )
+            if hidden:
                logger.warning(
-                    "[%s] Discord slash command limit reached (%d): %d skill(s) not registered",
-                    self.name, _DISCORD_CMD_LIMIT, skipped,
+                    "[%s] %d skill(s) not registered (Discord subcommand limits)",
+                    self.name, hidden,
                )
        except Exception as exc:
-            logger.warning("[%s] Failed to register skill slash commands: %s", self.name, exc)
+            logger.warning("[%s] Failed to register /skill group: %s", self.name, exc)

    def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
        """Build a MessageEvent from a Discord slash command interaction."""
@@ -72,7 +72,10 @@ try:
        UpdateMessageRequestBody,
    )
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
-    from lark_oapi.event.callback.model.p2_card_action_trigger import P2CardActionTriggerResponse
+    from lark_oapi.event.callback.model.p2_card_action_trigger import (
+        CallBackCard,
+        P2CardActionTriggerResponse,
+    )
    from lark_oapi.event.dispatcher_handler import EventDispatcherHandler
    from lark_oapi.ws import Client as FeishuWSClient

@@ -80,6 +83,7 @@ try:
 except ImportError:
    FEISHU_AVAILABLE = False
    lark = None  # type: ignore[assignment]
+    CallBackCard = None  # type: ignore[assignment]
    P2CardActionTriggerResponse = None  # type: ignore[assignment]
    EventDispatcherHandler = None  # type: ignore[assignment]
    FeishuWSClient = None  # type: ignore[assignment]
@@ -169,6 +173,19 @@ _FEISHU_WEBHOOK_BODY_TIMEOUT_SECONDS = 30          # max seconds to read request
 _FEISHU_WEBHOOK_ANOMALY_THRESHOLD = 25             # consecutive error responses before WARNING log
 _FEISHU_WEBHOOK_ANOMALY_TTL_SECONDS = 6 * 60 * 60  # anomaly tracker TTL (6 hours) — matches openclaw
 _FEISHU_CARD_ACTION_DEDUP_TTL_SECONDS = 15 * 60    # card action token dedup window (15 min)
+
+_APPROVAL_CHOICE_MAP: Dict[str, str] = {
+    "approve_once": "once",
+    "approve_session": "session",
+    "approve_always": "always",
+    "deny": "deny",
+}
+_APPROVAL_LABEL_MAP: Dict[str, str] = {
+    "once": "Approved once",
+    "session": "Approved for session",
+    "always": "Approved permanently",
+    "deny": "Denied",
+}
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
 _FEISHU_ACK_EMOJI = "OK"
@@ -1490,14 +1507,12 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.warning("[Feishu] send_exec_approval failed: %s", exc)
            return SendResult(success=False, error=str(exc))

-    async def _update_approval_card(
-        self, message_id: str, label: str, user_name: str, choice: str,
-    ) -> None:
-        """Replace the approval card with a resolved status card."""
-        if not self._client or not message_id:
-            return
+    @staticmethod
+    def _build_resolved_approval_card(*, choice: str, user_name: str) -> Dict[str, Any]:
+        """Build raw card JSON for a resolved approval action."""
        icon = "❌" if choice == "deny" else "✅"
-        card = {
+        label = _APPROVAL_LABEL_MAP.get(choice, "Resolved")
+        return {
            "config": {"wide_screen_mode": True},
            "header": {
                "title": {"content": f"{icon} {label}", "tag": "plain_text"},
@@ -1510,13 +1525,6 @@ class FeishuAdapter(BasePlatformAdapter):
                },
            ],
        }
-        try:
-            payload = json.dumps(card, ensure_ascii=False)
-            body = self._build_update_message_body(msg_type="interactive", content=payload)
-            request = self._build_update_message_request(message_id=message_id, request_body=body)
-            await asyncio.to_thread(self._client.im.v1.message.update, request)
-        except Exception as exc:
-            logger.warning("[Feishu] Failed to update approval card %s: %s", message_id, exc)

    async def send_voice(
        self,
@@ -1845,20 +1853,82 @@ class FeishuAdapter(BasePlatformAdapter):
        future.add_done_callback(self._log_background_failure)

    def _on_card_action_trigger(self, data: Any) -> Any:
-        """Schedule Feishu card actions on the adapter loop and acknowledge immediately."""
+        """Handle card-action callback from the Feishu SDK (synchronous).
+
+        For approval actions: parses the event once, returns the resolved card
+        inline (the only reliable way to sync all clients), and schedules a
+        lightweight async method to actually unblock the agent.
+
+        For other card actions: delegates to ``_handle_card_action_event``.
+        """
        loop = self._loop
-        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
+        if not self._loop_accepts_callbacks(loop):
            logger.warning("[Feishu] Dropping card action before adapter loop is ready")
-        else:
-            future = asyncio.run_coroutine_threadsafe(
-                self._handle_card_action_event(data),
-                loop,
-            )
-            future.add_done_callback(self._log_background_failure)
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+
+        event = getattr(data, "event", None)
+        action = getattr(event, "action", None)
+        action_value = getattr(action, "value", {}) or {}
+        hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
+
+        if hermes_action:
+            return self._handle_approval_card_action(event=event, action_value=action_value, loop=loop)
+
+        self._submit_on_loop(loop, self._handle_card_action_event(data))
        if P2CardActionTriggerResponse is None:
            return None
        return P2CardActionTriggerResponse()

+    @staticmethod
+    def _loop_accepts_callbacks(loop: Any) -> bool:
+        """Return True when the adapter loop can accept thread-safe submissions."""
+        return loop is not None and not bool(getattr(loop, "is_closed", lambda: False)())
+
+    def _submit_on_loop(self, loop: Any, coro: Any) -> None:
+        """Schedule background work on the adapter loop with shared failure logging."""
+        future = asyncio.run_coroutine_threadsafe(coro, loop)
+        future.add_done_callback(self._log_background_failure)
+
+    def _handle_approval_card_action(self, *, event: Any, action_value: Dict[str, Any], loop: Any) -> Any:
+        """Schedule approval resolution and build the synchronous callback response."""
+        approval_id = action_value.get("approval_id")
+        if approval_id is None:
+            logger.debug("[Feishu] Card action missing approval_id, ignoring")
+            return P2CardActionTriggerResponse() if P2CardActionTriggerResponse else None
+        choice = _APPROVAL_CHOICE_MAP.get(action_value.get("hermes_action"), "deny")
+
+        operator = getattr(event, "operator", None)
+        open_id = str(getattr(operator, "open_id", "") or "")
+        user_name = self._get_cached_sender_name(open_id) or open_id
+
+        self._submit_on_loop(loop, self._resolve_approval(approval_id, choice, user_name))
+
+        if P2CardActionTriggerResponse is None:
+            return None
+        response = P2CardActionTriggerResponse()
+        if CallBackCard is not None:
+            card = CallBackCard()
+            card.type = "raw"
+            card.data = self._build_resolved_approval_card(choice=choice, user_name=user_name)
+            response.card = card
+        return response
+
+    async def _resolve_approval(self, approval_id: Any, choice: str, user_name: str) -> None:
+        """Pop approval state and unblock the waiting agent thread."""
+        state = self._approval_state.pop(approval_id, None)
+        if not state:
+            logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
+            return
+        try:
+            from tools.approval import resolve_gateway_approval
+            count = resolve_gateway_approval(state["session_key"], choice)
+            logger.info(
+                "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                count, state["session_key"], choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
+
    async def _handle_reaction_event(self, event_type: str, data: Any) -> None:
        """Fetch the reacted-to message; if it was sent by this bot, emit a synthetic text event."""
        if not self._client:
@@ -1950,51 +2020,6 @@ class FeishuAdapter(BasePlatformAdapter):
        action_tag = str(getattr(action, "tag", "") or "button")
        action_value = getattr(action, "value", {}) or {}

-        # --- Exec approval button intercept ---
-        hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
-        if hermes_action:
-            approval_id = action_value.get("approval_id")
-            state = self._approval_state.pop(approval_id, None)
-            if not state:
-                logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
-                return
-
-            choice_map = {
-                "approve_once": "once",
-                "approve_session": "session",
-                "approve_always": "always",
-                "deny": "deny",
-            }
-            choice = choice_map.get(hermes_action, "deny")
-
-            label_map = {
-                "once": "Approved once",
-                "session": "Approved for session",
-                "always": "Approved permanently",
-                "deny": "Denied",
-            }
-            label = label_map.get(choice, "Resolved")
-
-            # Resolve sender name for the status card
-            sender_id = SimpleNamespace(open_id=open_id, user_id=None, union_id=None)
-            sender_profile = await self._resolve_sender_profile(sender_id)
-            user_name = sender_profile.get("user_name") or open_id
-
-            # Resolve the approval — unblocks the agent thread
-            try:
-                from tools.approval import resolve_gateway_approval
-                count = resolve_gateway_approval(state["session_key"], choice)
-                logger.info(
-                    "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
-                    count, state["session_key"], choice, user_name,
-                )
-            except Exception as exc:
-                logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
-
-            # Update the card to show the decision
-            await self._update_approval_card(state.get("message_id", ""), label, user_name, choice)
-            return
-
        synthetic_text = f"/card {action_tag}"
        if action_value:
            try:
@@ -2897,6 +2922,19 @@ class FeishuAdapter(BasePlatformAdapter):
            "user_id_alt": union_id,
        }

+    def _get_cached_sender_name(self, sender_id: Optional[str]) -> Optional[str]:
+        """Return a cached sender name only while its TTL is still valid."""
+        if not sender_id:
+            return None
+        cached = self._sender_name_cache.get(sender_id)
+        if cached is None:
+            return None
+        name, expire_at = cached
+        if time.time() < expire_at:
+            return name
+        self._sender_name_cache.pop(sender_id, None)
+        return None
+
    async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
        """Fetch the sender's display name from the Feishu contact API with a 10-minute cache.

@@ -2909,11 +2947,9 @@ class FeishuAdapter(BasePlatformAdapter):
        if not trimmed:
            return None
        now = time.time()
-        cached = self._sender_name_cache.get(trimmed)
-        if cached is not None:
-            name, expire_at = cached
-            if now < expire_at:
-                return name
+        cached_name = self._get_cached_sender_name(trimmed)
+        if cached_name is not None:
+            return cached_name
        try:
            from lark_oapi.api.contact.v3 import GetUserRequest  # lazy import
            if trimmed.startswith("ou_"):
@@ -573,6 +573,7 @@ class GatewayRunner:
        self._running_agents: Dict[str, Any] = {}
        self._running_agents_ts: Dict[str, float] = {}  # start timestamp per session
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
+        self._busy_ack_ts: Dict[str, float] = {}  # last busy-ack timestamp per session (debounce)

        # Cache AIAgent instances per session to preserve prompt caching.
        # Without this, a new AIAgent is created per message, rebuilding the
@@ -1329,26 +1330,100 @@ class GatewayRunner:
        merge_pending_message_event(adapter._pending_messages, session_key, event)

    async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
-        if not self._draining:
-            return False
+        # --- Draining case (gateway restarting/stopping) ---
+        if self._draining:
+            adapter = self.adapters.get(event.source.platform)
+            if not adapter:
+                return True
+
+            thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+            if self._queue_during_drain_enabled():
+                self._queue_or_replace_pending_event(session_key, event)
+                message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
+            else:
+                message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+            return True
+
+        # --- Normal busy case (agent actively running a task) ---
+        # The user sent a message while the agent is working.  Interrupt the
+        # agent immediately so it stops the current tool-calling loop and
+        # processes the new message.  The pending message is stored in the
+        # adapter so the base adapter picks it up once the interrupted run
+        # returns.  A brief ack tells the user what's happening (debounced
+        # to avoid spam when they fire multiple messages quickly).

        adapter = self.adapters.get(event.source.platform)
        if not adapter:
-            return True
+            return False  # let default path handle it
+
+        # Store the message so it's processed as the next turn after the
+        # interrupt causes the current run to exit.
+        from gateway.platforms.base import merge_pending_message_event
+        merge_pending_message_event(adapter._pending_messages, session_key, event)
+
+        # Interrupt the running agent — this aborts in-flight tool calls and
+        # causes the agent loop to exit at the next check point.
+        running_agent = self._running_agents.get(session_key)
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                running_agent.interrupt(event.text)
+            except Exception:
+                pass  # don't let interrupt failure block the ack
+
+        # Debounce: only send an acknowledgment once every 30 seconds per session
+        # to avoid spamming the user when they send multiple messages quickly
+        _BUSY_ACK_COOLDOWN = 30
+        now = time.time()
+        last_ack = self._busy_ack_ts.get(session_key, 0)
+        if now - last_ack < _BUSY_ACK_COOLDOWN:
+            return True  # interrupt sent, ack already delivered recently
+
+        self._busy_ack_ts[session_key] = now
+
+        # Build a status-rich acknowledgment
+        status_parts = []
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+            try:
+                summary = running_agent.get_activity_summary()
+                iteration = summary.get("api_call_count", 0)
+                max_iter = summary.get("max_iterations", 0)
+                current_tool = summary.get("current_tool")
+                start_ts = self._running_agents_ts.get(session_key, 0)
+                if start_ts:
+                    elapsed_min = int((now - start_ts) / 60)
+                    if elapsed_min > 0:
+                        status_parts.append(f"{elapsed_min} min elapsed")
+                if max_iter:
+                    status_parts.append(f"iteration {iteration}/{max_iter}")
+                if current_tool:
+                    status_parts.append(f"running: {current_tool}")
+            except Exception:
+                pass
+
+        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
+        message = (
+            f"⚡ Interrupting current task{status_detail}. "
+            f"I'll respond to your message shortly."
+        )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-        if self._queue_during_drain_enabled():
-            self._queue_or_replace_pending_event(session_key, event)
-            message = f"⏳ Gateway {self._status_action_gerund()} — queued for the next turn after it comes back."
-        else:
-            message = f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
+        try:
+            await adapter._send_with_retry(
+                chat_id=event.source.chat_id,
+                content=message,
+                reply_to=event.message_id,
+                metadata=thread_meta,
+            )
+        except Exception as e:
+            logger.debug("Failed to send busy-ack: %s", e)

-        await adapter._send_with_retry(
-            chat_id=event.source.chat_id,
-            content=message,
-            reply_to=event.message_id,
-            metadata=thread_meta,
-        )
        return True

    async def _drain_active_agents(self, timeout: float) -> tuple[Dict[str, Any], bool]:
@@ -1391,6 +1466,65 @@ class GatewayRunner:
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

+    async def _notify_active_sessions_of_shutdown(self) -> None:
+        """Send a notification to every chat with an active agent.
+
+        Called at the very start of stop() — adapters are still connected so
+        messages can be delivered.  Best-effort: individual send failures are
+        logged and swallowed so they never block the shutdown sequence.
+        """
+        active = self._snapshot_running_agents()
+        if not active:
+            return
+
+        action = "restarting" if self._restart_requested else "shutting down"
+        hint = (
+            "Your current task will be interrupted. "
+            "Send any message after restart to resume where it left off."
+            if self._restart_requested
+            else "Your current task will be interrupted."
+        )
+        msg = f"⚠️ Gateway {action} — {hint}"
+
+        notified: set = set()
+        for session_key in active:
+            # Parse platform + chat_id from the session key.
+            # Format: agent:main:{platform}:{chat_type}:{chat_id}[:{extra}...]
+            parts = session_key.split(":")
+            if len(parts) < 5:
+                continue
+            platform_str = parts[2]
+            chat_id = parts[4]
+
+            # Deduplicate: one notification per chat, even if multiple
+            # sessions (different users/threads) share the same chat.
+            dedup_key = (platform_str, chat_id)
+            if dedup_key in notified:
+                continue
+
+            try:
+                platform = Platform(platform_str)
+                adapter = self.adapters.get(platform)
+                if not adapter:
+                    continue
+
+                # Include thread_id if present so the message lands in the
+                # correct forum topic / thread.
+                thread_id = parts[5] if len(parts) > 5 else None
+                metadata = {"thread_id": thread_id} if thread_id else None
+
+                await adapter.send(chat_id, msg, metadata=metadata)
+                notified.add(dedup_key)
+                logger.info(
+                    "Sent shutdown notification to %s:%s",
+                    platform_str, chat_id,
+                )
+            except Exception as e:
+                logger.debug(
+                    "Failed to send shutdown notification to %s:%s: %s",
+                    platform_str, chat_id, e,
+                )
+
    def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
        for agent in active_agents.values():
            try:
@@ -1416,6 +1550,106 @@ class GatewayRunner:
            except Exception:
                pass

+    _STUCK_LOOP_THRESHOLD = 3  # restarts while active before auto-suspend
+    _STUCK_LOOP_FILE = ".restart_failure_counts"
+
+    def _increment_restart_failure_counts(self, active_session_keys: set) -> None:
+        """Increment restart-failure counters for sessions active at shutdown.
+
+        Persists to a JSON file so counters survive across restarts.
+        Sessions NOT in active_session_keys are removed (they completed
+        successfully, so the loop is broken).
+        """
+        import json
+
+        path = _hermes_home / self._STUCK_LOOP_FILE
+        try:
+            counts = json.loads(path.read_text()) if path.exists() else {}
+        except Exception:
+            counts = {}
+
+        # Increment active sessions, remove inactive ones (loop broken)
+        new_counts = {}
+        for key in active_session_keys:
+            new_counts[key] = counts.get(key, 0) + 1
+        # Keep any entries that are still above 0 even if not active now
+        # (they might become active again next restart)
+
+        try:
+            path.write_text(json.dumps(new_counts))
+        except Exception:
+            pass
+
+    def _suspend_stuck_loop_sessions(self) -> int:
+        """Suspend sessions that have been active across too many restarts.
+
+        Returns the number of sessions suspended.  Called on gateway startup
+        AFTER suspend_recently_active() to catch the stuck-loop pattern:
+        session loads → agent gets stuck → gateway restarts → repeat.
+        """
+        import json
+
+        path = _hermes_home / self._STUCK_LOOP_FILE
+        if not path.exists():
+            return 0
+
+        try:
+            counts = json.loads(path.read_text())
+        except Exception:
+            return 0
+
+        suspended = 0
+        stuck_keys = [k for k, v in counts.items() if v >= self._STUCK_LOOP_THRESHOLD]
+
+        for session_key in stuck_keys:
+            try:
+                entry = self.session_store._entries.get(session_key)
+                if entry and not entry.suspended:
+                    entry.suspended = True
+                    suspended += 1
+                    logger.warning(
+                        "Auto-suspended stuck session %s (active across %d "
+                        "consecutive restarts — likely a stuck loop)",
+                        session_key[:30], counts[session_key],
+                    )
+            except Exception:
+                pass
+
+        if suspended:
+            try:
+                self.session_store._save()
+            except Exception:
+                pass
+
+        # Clear the file — counters start fresh after suspension
+        try:
+            path.unlink(missing_ok=True)
+        except Exception:
+            pass
+
+        return suspended
+
+    def _clear_restart_failure_count(self, session_key: str) -> None:
+        """Clear the restart-failure counter for a session that completed OK.
+
+        Called after a successful agent turn to signal the loop is broken.
+        """
+        import json
+
+        path = _hermes_home / self._STUCK_LOOP_FILE
+        if not path.exists():
+            return
+        try:
+            counts = json.loads(path.read_text())
+            if session_key in counts:
+                del counts[session_key]
+                if counts:
+                    path.write_text(json.dumps(counts))
+                else:
+                    path.unlink(missing_ok=True)
+        except Exception:
+            pass
+
    async def _launch_detached_restart_command(self) -> None:
        import shutil
        import subprocess
@@ -1559,6 +1793,17 @@ class GatewayRunner:
            except Exception as e:
                logger.warning("Session suspension on startup failed: %s", e)

+        # Stuck-loop detection (#7536): if a session has been active across
+        # 3+ consecutive restarts, it's probably stuck in a loop (the same
+        # history keeps causing the agent to hang).  Auto-suspend it so the
+        # user gets a clean slate on the next message.
+        try:
+            stuck = self._suspend_stuck_loop_sessions()
+            if stuck:
+                logger.warning("Auto-suspended %d stuck-loop session(s)", stuck)
+        except Exception as e:
+            logger.debug("Stuck-loop detection failed: %s", e)
+
        connected_count = 0
        enabled_platform_count = 0
        startup_nonretryable_errors: list[str] = []
@@ -2018,6 +2263,10 @@ class GatewayRunner:
            self._running = False
            self._draining = True

+            # Notify all chats with active agents BEFORE draining.
+            # Adapters are still connected here, so messages can be sent.
+            await self._notify_active_sessions_of_shutdown()
+
            timeout = self._restart_drain_timeout
            active_agents, timed_out = await self._drain_active_agents(timeout)
            if timed_out:
@@ -2063,6 +2312,8 @@ class GatewayRunner:
            self._running_agents.clear()
            self._pending_messages.clear()
            self._pending_approvals.clear()
+            if hasattr(self, '_busy_ack_ts'):
+                self._busy_ack_ts.clear()
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
@@ -2088,12 +2339,31 @@ class GatewayRunner:

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
-            # after unexpected exits — graceful shutdowns already drain
-            # active agents, so there's no stuck-session risk.
-            try:
-                (_hermes_home / ".clean_shutdown").touch()
-            except Exception:
-                pass
+            # after unexpected exits.  However, if the drain timed out and
+            # agents were force-interrupted, their sessions may be in an
+            # incomplete state (trailing tool response, no final assistant
+            # message).  Skip the marker in that case so the next startup
+            # suspends those sessions — giving users a clean slate instead
+            # of resuming a half-finished tool loop.
+            if not timed_out:
+                try:
+                    (_hermes_home / ".clean_shutdown").touch()
+                except Exception:
+                    pass
+            else:
+                logger.info(
+                    "Skipping .clean_shutdown marker — drain timed out with "
+                    "interrupted agents; next startup will suspend recently "
+                    "active sessions."
+                )
+
+            # Track sessions that were active at shutdown for stuck-loop
+            # detection (#7536).  On each restart, the counter increments
+            # for sessions that were running.  If a session hits the
+            # threshold (3 consecutive restarts while active), the next
+            # startup auto-suspends it — breaking the loop.
+            if active_agents:
+                self._increment_restart_failure_counts(set(active_agents.keys()))

            if self._restart_requested and self._restart_via_service:
                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
@@ -2528,6 +2798,7 @@ class GatewayRunner:
                )
                del self._running_agents[_quick_key]
                self._running_agents_ts.pop(_quick_key, None)
+                self._busy_ack_ts.pop(_quick_key, None)

        if _quick_key in self._running_agents:
            if event.get_command() == "status":
@@ -3593,6 +3864,12 @@ class GatewayRunner:
                _response_time, _api_calls, _resp_len,
            )

+            # Successful turn — clear any stuck-loop counter for this session.
+            # This ensures the counter only accumulates across CONSECUTIVE
+            # restarts where the session was active (never completed).
+            if session_key:
+                self._clear_restart_failure_count(session_key)
+
            # Surface error details when the agent failed silently (final_response=None)
            if not response and agent_result.get("failed"):
                error_detail = agent_result.get("error", "unknown error")
@@ -3699,14 +3976,11 @@ class GatewayRunner:
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
            #
-            # IMPORTANT: When the agent failed before producing any response
-            # (e.g. context-overflow 400), do NOT persist the user's message.
+            # IMPORTANT: When the agent failed (e.g. context-overflow 400,
+            # compression exhausted), do NOT persist the user's message.
            # Persisting it would make the session even larger, causing the
-            # same failure on the next attempt — an infinite loop. (#1630)
-            agent_failed_early = (
-                agent_result.get("failed")
-                and not agent_result.get("final_response")
-            )
+            # same failure on the next attempt — an infinite loop. (#1630, #9893)
+            agent_failed_early = bool(agent_result.get("failed"))
            if agent_failed_early:
                logger.info(
                    "Skipping transcript persistence for failed request in "
@@ -3714,6 +3988,24 @@ class GatewayRunner:
                    session_entry.session_id,
                )

+            # When compression is exhausted, the session is permanently too
+            # large to process.  Auto-reset it so the next message starts
+            # fresh instead of replaying the same oversized context in an
+            # infinite fail loop.  (#9893)
+            if agent_result.get("compression_exhausted") and session_entry and session_key:
+                logger.info(
+                    "Auto-resetting session %s after compression exhaustion.",
+                    session_entry.session_id,
+                )
+                self.session_store.reset_session(session_key)
+                self._evict_cached_agent(session_key)
+                self._session_model_overrides.pop(session_key, None)
+                response = (response or "") + (
+                    "\n\n🔄 Session auto-reset — the conversation exceeded the "
+                    "maximum context size and could not be compressed further. "
+                    "Your next message will start a fresh session."
+                )
+
            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
@@ -3821,6 +4113,8 @@ class GatewayRunner:
            _hist_len = len(history) if 'history' in locals() else 0
            if status_code == 401:
                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
+            elif status_code == 402:
+                status_hint = " Your API balance or quota is exhausted. Check your provider dashboard."
            elif status_code == 429:
                # Check if this is a plan usage limit (resets on a schedule) vs a transient rate limit
                _err_body = getattr(e, "response", None)
@@ -8164,6 +8458,12 @@ class GatewayRunner:
                    cached = _cache.get(session_key)
                    if cached and cached[1] == _sig:
                        agent = cached[0]
+                        # Reset activity timestamp so the inactivity timeout
+                        # handler doesn't see stale idle time from the previous
+                        # turn and immediately kill this agent.  (#9051)
+                        agent._last_activity_ts = time.time()
+                        agent._last_activity_desc = "starting new turn (cached)"
+                        agent._api_call_count = 0
                        logger.debug("Reusing cached agent for session %s", session_key)

            if agent is None:
@@ -8376,6 +8676,21 @@ class GatewayRunner:
            if _msn:
                message = _msn + "\n\n" + message

+            # Auto-continue: if the loaded history ends with a tool result,
+            # the previous agent turn was interrupted mid-work (gateway
+            # restart, crash, SIGTERM).  Prepend a system note so the model
+            # finishes processing the pending tool results before addressing
+            # the user's new message.  (#4493)
+            if agent_history and agent_history[-1].get("role") == "tool":
+                message = (
+                    "[System note: Your previous turn was interrupted before you could "
+                    "process the last tool result(s). The conversation history contains "
+                    "tool outputs you haven't responded to yet. Please finish processing "
+                    "those results and summarize what was accomplished, then address the "
+                    "user's new message below.]\n\n"
+                    + message
+                )
+
            _approval_session_key = session_key or ""
            _approval_session_token = set_current_session_key(_approval_session_key)
            register_gateway_notify(_approval_session_key, _approval_notify_sync)
@@ -8410,6 +8725,8 @@ class GatewayRunner:
                    "final_response": error_msg,
                    "messages": result.get("messages", []),
                    "api_calls": result.get("api_calls", 0),
+                    "failed": result.get("failed", False),
+                    "compression_exhausted": result.get("compression_exhausted", False),
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
@@ -9187,8 +9504,41 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =

    runner = GatewayRunner(config)
    
+    # Track whether a signal initiated the shutdown (vs. internal request).
+    # When an unexpected SIGTERM kills the gateway, we exit non-zero so
+    # systemd's Restart=on-failure revives the process.  systemctl stop
+    # is safe: systemd tracks stop-requested state independently of exit
+    # code, so Restart= never fires for a deliberate stop.
+    _signal_initiated_shutdown = False
+
    # Set up signal handlers
    def shutdown_signal_handler():
+        nonlocal _signal_initiated_shutdown
+        _signal_initiated_shutdown = True
+        logger.info("Received SIGTERM/SIGINT — initiating shutdown")
+        # Diagnostic: log all hermes-related processes so we can identify
+        # what triggered the signal (hermes update, hermes gateway restart,
+        # a stale detached subprocess, etc.).
+        try:
+            import subprocess as _sp
+            _ps = _sp.run(
+                ["ps", "aux"],
+                capture_output=True, text=True, timeout=3,
+            )
+            _hermes_procs = [
+                line for line in _ps.stdout.splitlines()
+                if ("hermes" in line.lower() or "gateway" in line.lower())
+                and str(os.getpid()) not in line.split()[1:2]  # exclude self
+            ]
+            if _hermes_procs:
+                logger.warning(
+                    "Shutdown diagnostic — other hermes processes running:\n  %s",
+                    "\n  ".join(_hermes_procs),
+                )
+            else:
+                logger.info("Shutdown diagnostic — no other hermes processes found")
+        except Exception:
+            pass
        asyncio.create_task(runner.stop())

    def restart_signal_handler():
@@ -9258,6 +9608,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    if runner.exit_code is not None:
        raise SystemExit(runner.exit_code)

+    # When a signal (SIGTERM/SIGINT) caused the shutdown and it wasn't a
+    # planned restart (/restart, /update, SIGUSR1), exit non-zero so
+    # systemd's Restart=on-failure revives the process.  This covers:
+    #   - hermes update killing the gateway mid-work
+    #   - External kill commands
+    #   - WSL2/container runtime sending unexpected signals
+    # systemctl stop is safe: systemd tracks "stop requested" state
+    # independently of exit code, so Restart= never fires for it.
+    if _signal_initiated_shutdown and not runner._restart_requested:
+        logger.info(
+            "Exiting with code 1 (signal-initiated shutdown without restart "
+            "request) so systemd Restart=on-failure can revive the gateway."
+        )
+        return False  # → sys.exit(1) in the caller
+
    return True


@@ -266,9 +266,25 @@ def read_runtime_status() -> Optional[dict[str, Any]]:


 def remove_pid_file() -> None:
-    """Remove the gateway PID file if it exists."""
+    """Remove the gateway PID file, but only if it belongs to this process.
+
+    During --replace handoffs, the old process's atexit handler can fire AFTER
+    the new process has written its own PID file.  Blindly removing the file
+    would delete the new process's record, leaving the gateway running with no
+    PID file (invisible to ``get_running_pid()``).
+    """
    try:
-        _get_pid_path().unlink(missing_ok=True)
+        path = _get_pid_path()
+        record = _read_json_file(path)
+        if record is not None:
+            try:
+                file_pid = int(record["pid"])
+            except (KeyError, TypeError, ValueError):
+                file_pid = None
+            if file_pid is not None and file_pid != os.getpid():
+                # PID file belongs to a different process — leave it alone.
+                return
+        path.unlink(missing_ok=True)
    except Exception:
        pass

@@ -383,13 +383,16 @@ def _resolve_api_key_provider_secret(
 # Z.AI has separate billing for general vs coding plans, and global vs China
 # endpoints.  A key that works on one may return "Insufficient balance" on
 # another.  We probe at setup time and store the working endpoint.
+# Each entry lists candidate models to try in order — newer coding plan accounts
+# may only have access to recent models (glm-5.1, glm-5v-turbo) while older
+# ones still use glm-4.7.

 ZAI_ENDPOINTS = [
-    # (id, base_url, default_model, label)
-    ("global",        "https://api.z.ai/api/paas/v4",        "glm-5",   "Global"),
-    ("cn",            "https://open.bigmodel.cn/api/paas/v4", "glm-5",   "China"),
-    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  "glm-4.7", "Global (Coding Plan)"),
-    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", "glm-4.7", "China (Coding Plan)"),
+    # (id, base_url, probe_models, label)
+    ("global",        "https://api.z.ai/api/paas/v4",        ["glm-5"],   "Global"),
+    ("cn",            "https://open.bigmodel.cn/api/paas/v4", ["glm-5"],   "China"),
+    ("coding-global", "https://api.z.ai/api/coding/paas/v4",  ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "Global (Coding Plan)"),
+    ("coding-cn",     "https://open.bigmodel.cn/api/coding/paas/v4", ["glm-5.1", "glm-5v-turbo", "glm-4.7"], "China (Coding Plan)"),
 ]


@@ -397,35 +400,37 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
    """Probe z.ai endpoints to find one that accepts this API key.

    Returns {"id": ..., "base_url": ..., "model": ..., "label": ...} for the
-    first working endpoint, or None if all fail.
+    first working endpoint, or None if all fail.  For endpoints with multiple
+    candidate models, tries each in order and returns the first that succeeds.
    """
-    for ep_id, base_url, model, label in ZAI_ENDPOINTS:
-        try:
-            resp = httpx.post(
-                f"{base_url}/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json",
-                },
-                json={
-                    "model": model,
-                    "stream": False,
-                    "max_tokens": 1,
-                    "messages": [{"role": "user", "content": "ping"}],
-                },
-                timeout=timeout,
-            )
-            if resp.status_code == 200:
-                logger.debug("Z.AI endpoint probe: %s (%s) OK", ep_id, base_url)
-                return {
-                    "id": ep_id,
-                    "base_url": base_url,
-                    "model": model,
-                    "label": label,
-                }
-            logger.debug("Z.AI endpoint probe: %s returned %s", ep_id, resp.status_code)
-        except Exception as exc:
-            logger.debug("Z.AI endpoint probe: %s failed: %s", ep_id, exc)
+    for ep_id, base_url, probe_models, label in ZAI_ENDPOINTS:
+        for model in probe_models:
+            try:
+                resp = httpx.post(
+                    f"{base_url}/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {api_key}",
+                        "Content-Type": "application/json",
+                    },
+                    json={
+                        "model": model,
+                        "stream": False,
+                        "max_tokens": 1,
+                        "messages": [{"role": "user", "content": "ping"}],
+                    },
+                    timeout=timeout,
+                )
+                if resp.status_code == 200:
+                    logger.debug("Z.AI endpoint probe: %s (%s) model=%s OK", ep_id, base_url, model)
+                    return {
+                        "id": ep_id,
+                        "base_url": base_url,
+                        "model": model,
+                        "label": label,
+                    }
+                logger.debug("Z.AI endpoint probe: %s model=%s returned %s", ep_id, model, resp.status_code)
+            except Exception as exc:
+                logger.debug("Z.AI endpoint probe: %s model=%s failed: %s", ep_id, model, exc)
    return None


@@ -75,12 +75,12 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
        if not hasattr(cli, "_secret_deadline"):
            cli._secret_deadline = 0
        try:
-            value = getpass.getpass(f"{prompt} (hidden, Enter to skip): ")
+            value = getpass.getpass(f"{prompt} (hidden, ESC or empty Enter to skip): ")
        except (EOFError, KeyboardInterrupt):
            value = ""

        if not value:
-            cprint(f"\n{_DIM}  ⏭ Secret entry cancelled{_RST}")
+            cprint(f"\n{_DIM}  ⏭ Secret entry skipped{_RST}")
            return {
                "success": True,
                "reason": "cancelled",
@@ -133,7 +133,7 @@ def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
                cli._app.invalidate()

            if not value:
-                cprint(f"\n{_DIM}  ⏭ Secret entry cancelled{_RST}")
+                cprint(f"\n{_DIM}  ⏭ Secret entry skipped{_RST}")
                return {
                    "success": True,
                    "reason": "cancelled",
@@ -582,6 +582,116 @@ def discord_skill_commands(
    )


+def discord_skill_commands_by_category(
+    reserved_names: set[str],
+) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
+    """Return skill entries organized by category for Discord ``/skill`` subcommand groups.
+
+    Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
+    (e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
+    category.  Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
+    *uncategorized* — the caller should register them as direct subcommands
+    of the ``/skill`` group.
+
+    The same filtering as :func:`discord_skill_commands` is applied: hub
+    skills excluded, per-platform disabled excluded, names clamped.
+
+    Returns:
+        ``(categories, uncategorized, hidden_count)``
+
+        - *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
+        - *uncategorized*: ``[(name, description, cmd_key), ...]``
+        - *hidden_count*: skills dropped due to Discord group limits
+          (25 subcommand groups, 25 subcommands per group)
+    """
+    from pathlib import Path as _P
+
+    _platform_disabled: set[str] = set()
+    try:
+        from agent.skill_utils import get_disabled_skill_names
+        _platform_disabled = get_disabled_skill_names(platform="discord")
+    except Exception:
+        pass
+
+    # Collect raw skill data --------------------------------------------------
+    categories: dict[str, list[tuple[str, str, str]]] = {}
+    uncategorized: list[tuple[str, str, str]] = []
+    _names_used: set[str] = set(reserved_names)
+    hidden = 0
+
+    try:
+        from agent.skill_commands import get_skill_commands
+        from tools.skills_tool import SKILLS_DIR
+        _skills_dir = SKILLS_DIR.resolve()
+        _hub_dir = (SKILLS_DIR / ".hub").resolve()
+        skill_cmds = get_skill_commands()
+
+        for cmd_key in sorted(skill_cmds):
+            info = skill_cmds[cmd_key]
+            skill_path = info.get("skill_md_path", "")
+            if not skill_path:
+                continue
+            sp = _P(skill_path).resolve()
+            # Skip skills outside SKILLS_DIR or from the hub
+            if not str(sp).startswith(str(_skills_dir)):
+                continue
+            if str(sp).startswith(str(_hub_dir)):
+                continue
+
+            skill_name = info.get("name", "")
+            if skill_name in _platform_disabled:
+                continue
+
+            raw_name = cmd_key.lstrip("/")
+            # Clamp to 32 chars (Discord limit)
+            discord_name = raw_name[:32]
+            if discord_name in _names_used:
+                continue
+            _names_used.add(discord_name)
+
+            desc = info.get("description", "")
+            if len(desc) > 100:
+                desc = desc[:97] + "..."
+
+            # Determine category from the relative path within SKILLS_DIR.
+            # e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
+            try:
+                rel = sp.parent.relative_to(_skills_dir)
+            except ValueError:
+                continue
+            parts = rel.parts
+            if len(parts) >= 2:
+                cat = parts[0]
+                categories.setdefault(cat, []).append((discord_name, desc, cmd_key))
+            else:
+                uncategorized.append((discord_name, desc, cmd_key))
+    except Exception:
+        pass
+
+    # Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
+    _MAX_GROUPS = 25
+    _MAX_PER_GROUP = 25
+
+    trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
+    group_count = 0
+    for cat in sorted(categories):
+        if group_count >= _MAX_GROUPS:
+            hidden += len(categories[cat])
+            continue
+        entries = categories[cat][:_MAX_PER_GROUP]
+        hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
+        trimmed_categories[cat] = entries
+        group_count += 1
+
+    # Uncategorized skills also count against the 25 top-level limit
+    remaining_slots = _MAX_GROUPS - group_count
+    if len(uncategorized) > remaining_slots:
+        hidden += len(uncategorized) - remaining_slots
+        uncategorized = uncategorized[:remaining_slots]
+
+    return trimmed_categories, uncategorized, hidden
+
+
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -734,8 +844,7 @@ class SlashCommandCompleter(Completer):
            return None
        return word

-    @staticmethod
-    def _context_completions(word: str, limit: int = 30):
+    def _context_completions(self, word: str, limit: int = 30):
        """Yield Claude Code-style @ context completions.

        Bare ``@`` or ``@partial`` shows static references and matching
@@ -2766,6 +2766,47 @@ def sanitize_env_file() -> int:
    return fixes


+def _check_non_ascii_credential(key: str, value: str) -> str:
+    """Warn and strip non-ASCII characters from credential values.
+
+    API keys and tokens must be pure ASCII — they are sent as HTTP header
+    values which httpx/httpcore encode as ASCII.  Non-ASCII characters
+    (commonly introduced by copy-pasting from rich-text editors or PDFs
+    that substitute lookalike Unicode glyphs for ASCII letters) cause
+    ``UnicodeEncodeError: 'ascii' codec can't encode character`` at
+    request time.
+
+    Returns the sanitized (ASCII-only) value.  Prints a warning if any
+    non-ASCII characters were found and removed.
+    """
+    try:
+        value.encode("ascii")
+        return value  # all ASCII — nothing to do
+    except UnicodeEncodeError:
+        pass
+
+    # Build a readable list of the offending characters
+    bad_chars: list[str] = []
+    for i, ch in enumerate(value):
+        if ord(ch) > 127:
+            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
+    sanitized = value.encode("ascii", errors="ignore").decode("ascii")
+
+    import sys
+    print(
+        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
+        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
+        f"  or web page that substitutes lookalike Unicode glyphs for ASCII letters.\n"
+        f"\n"
+        + "\n".join(f"  {line}" for line in bad_chars[:5])
+        + ("\n  ... and more" if len(bad_chars) > 5 else "")
+        + f"\n\n  The non-ASCII characters have been stripped automatically.\n"
+        f"  If authentication fails, re-copy the key from the provider's dashboard.\n",
+        file=sys.stderr,
+    )
+    return sanitized
+
+
 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
    if is_managed():
@@ -2774,6 +2815,8 @@ def save_env_value(key: str, value: str):
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
    value = value.replace("\n", "").replace("\r", "")
+    # API keys / tokens must be ASCII — strip non-ASCII with a warning.
+    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
    
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
 from hermes_constants import display_hermes_home
@@ -513,7 +514,87 @@ def run_doctor(args):
            pass

    _check_gateway_service_linger(issues)
-    
+
+    # =========================================================================
+    # Check: Command installation (hermes bin symlink)
+    # =========================================================================
+    if sys.platform != "win32":
+        print()
+        print(color("◆ Command Installation", Colors.CYAN, Colors.BOLD))
+
+        # Determine the venv entry point location
+        _venv_bin = None
+        for _venv_name in ("venv", ".venv"):
+            _candidate = PROJECT_ROOT / _venv_name / "bin" / "hermes"
+            if _candidate.exists():
+                _venv_bin = _candidate
+                break
+
+        # Determine the expected command link directory (mirrors install.sh logic)
+        _prefix = os.environ.get("PREFIX", "")
+        _is_termux_env = bool(os.environ.get("TERMUX_VERSION")) or "com.termux/files/usr" in _prefix
+        if _is_termux_env and _prefix:
+            _cmd_link_dir = Path(_prefix) / "bin"
+            _cmd_link_display = "$PREFIX/bin"
+        else:
+            _cmd_link_dir = Path.home() / ".local" / "bin"
+            _cmd_link_display = "~/.local/bin"
+        _cmd_link = _cmd_link_dir / "hermes"
+
+        if _venv_bin is None:
+            check_warn(
+                "Venv entry point not found",
+                "(hermes not in venv/bin/ or .venv/bin/ — reinstall with pip install -e '.[all]')"
+            )
+            manual_issues.append(
+                f"Reinstall entry point: cd {PROJECT_ROOT} && source venv/bin/activate && pip install -e '.[all]'"
+            )
+        else:
+            check_ok(f"Venv entry point exists ({_venv_bin.relative_to(PROJECT_ROOT)})")
+
+            # Check the symlink at the command link location
+            if _cmd_link.is_symlink():
+                _target = _cmd_link.resolve()
+                _expected = _venv_bin.resolve()
+                if _target == _expected:
+                    check_ok(f"{_cmd_link_display}/hermes → correct target")
+                else:
+                    check_warn(
+                        f"{_cmd_link_display}/hermes points to wrong target",
+                        f"(→ {_target}, expected → {_expected})"
+                    )
+                    if should_fix:
+                        _cmd_link.unlink()
+                        _cmd_link.symlink_to(_venv_bin)
+                        check_ok(f"Fixed symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                        fixed_count += 1
+                    else:
+                        issues.append(f"Broken symlink at {_cmd_link_display}/hermes — run 'hermes doctor --fix'")
+            elif _cmd_link.exists():
+                # It's a regular file, not a symlink — possibly a wrapper script
+                check_ok(f"{_cmd_link_display}/hermes exists (non-symlink)")
+            else:
+                check_fail(
+                    f"{_cmd_link_display}/hermes not found",
+                    "(hermes command may not work outside the venv)"
+                )
+                if should_fix:
+                    _cmd_link_dir.mkdir(parents=True, exist_ok=True)
+                    _cmd_link.symlink_to(_venv_bin)
+                    check_ok(f"Created symlink: {_cmd_link_display}/hermes → {_venv_bin}")
+                    fixed_count += 1
+
+                    # Check if the link dir is on PATH
+                    _path_dirs = os.environ.get("PATH", "").split(os.pathsep)
+                    if str(_cmd_link_dir) not in _path_dirs:
+                        check_warn(
+                            f"{_cmd_link_display} is not on your PATH",
+                            "(add it to your shell config: export PATH=\"$HOME/.local/bin:$PATH\")"
+                        )
+                        manual_issues.append(f"Add {_cmd_link_display} to your PATH")
+                else:
+                    issues.append(f"Missing {_cmd_link_display}/hermes symlink — run 'hermes doctor --fix'")
+
    # =========================================================================
    # Check: External tools
    # =========================================================================
@@ -8,11 +8,40 @@ from pathlib import Path
 from dotenv import load_dotenv


+# Env var name suffixes that indicate credential values.  These are the
+# only env vars whose values we sanitize on load — we must not silently
+# alter arbitrary user env vars, but credentials are known to require
+# pure ASCII (they become HTTP header values).
+_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
+
+
+def _sanitize_loaded_credentials() -> None:
+    """Strip non-ASCII characters from credential env vars in os.environ.
+
+    Called after dotenv loads so the rest of the codebase never sees
+    non-ASCII API keys.  Only touches env vars whose names end with
+    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+    """
+    for key, value in list(os.environ.items()):
+        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
+            continue
+        try:
+            value.encode("ascii")
+        except UnicodeEncodeError:
+            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+
+
 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
    try:
        load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
+    # Strip non-ASCII characters from credential env vars that were just
+    # loaded.  API keys must be pure ASCII since they're sent as HTTP
+    # header values (httpx encodes headers as ASCII).  Non-ASCII chars
+    # typically come from copy-pasting keys from PDFs or rich-text editors
+    # that substitute Unicode lookalike glyphs (e.g. ʋ U+028B for v).
+    _sanitize_loaded_credentials()


 def _sanitize_env_file_if_needed(path: Path) -> None:
@@ -715,7 +715,9 @@ def _detect_venv_dir() -> Path | None:
    """Detect the active virtualenv directory.

    Checks ``sys.prefix`` first (works regardless of the directory name),
-    then falls back to probing common directory names under PROJECT_ROOT.
+    then ``VIRTUAL_ENV`` env var (covers uv-managed environments where
+    sys.prefix == sys.base_prefix), then falls back to probing common
+    directory names under PROJECT_ROOT.
    Returns ``None`` when no virtualenv can be found.
    """
    # If we're running inside a virtualenv, sys.prefix points to it.
@@ -724,6 +726,15 @@ def _detect_venv_dir() -> Path | None:
        if venv.is_dir():
            return venv

+    # uv and some other tools set VIRTUAL_ENV without changing sys.prefix.
+    # This catches `uv run` where sys.prefix == sys.base_prefix but the
+    # environment IS a venv.  (#8620)
+    _virtual_env = os.environ.get("VIRTUAL_ENV")
+    if _virtual_env:
+        venv = Path(_virtual_env)
+        if venv.is_dir():
+            return venv
+
    # Fallback: check common virtualenv directory names under the project root.
    for candidate in (".venv", "venv"):
        venv = PROJECT_ROOT / candidate
@@ -1128,7 +1139,62 @@ def systemd_restart(system: bool = False):

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
+        # SIGUSR1 sent — the gateway will drain active agents, exit with
+        # code 75, and systemd will restart it after RestartSec (30s).
+        # Wait for the old process to die and the new one to become active
+        # so the CLI doesn't return while the service is still restarting.
+        import time
+        scope_label = _service_scope_label(system).capitalize()
+        svc = get_service_name()
+        scope_cmd = _systemctl_cmd(system)
+
+        # Phase 1: wait for old process to exit (drain + shutdown)
+        print(f"⏳ {scope_label} service draining active work...")
+        deadline = time.time() + 90
+        while time.time() < deadline:
+            try:
+                os.kill(pid, 0)
+                time.sleep(1)
+            except (ProcessLookupError, PermissionError):
+                break  # old process is gone
+        else:
+            print(f"⚠ Old process (PID {pid}) still alive after 90s")
+
+        # Phase 2: wait for systemd to start the new process
+        print(f"⏳ Waiting for {svc} to restart...")
+        deadline = time.time() + 60
+        while time.time() < deadline:
+            try:
+                result = subprocess.run(
+                    scope_cmd + ["is-active", svc],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if result.stdout.strip() == "active":
+                    # Verify it's a NEW process, not the old one somehow
+                    new_pid = get_running_pid()
+                    if new_pid and new_pid != pid:
+                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                        return
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
+            time.sleep(2)
+
+        # Timed out — check final state
+        try:
+            result = subprocess.run(
+                scope_cmd + ["is-active", svc],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                print(f"✓ {scope_label} service restarted")
+                return
+        except Exception:
+            pass
+        print(
+            f"⚠ {scope_label} service did not become active within 60s.\n"
+            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        )
        return
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
@@ -2864,6 +2930,15 @@ def gateway_command(args):

    elif subcmd == "start":
        system = getattr(args, 'system', False)
+        start_all = getattr(args, 'all', False)
+
+        if start_all:
+            # Kill all stale gateway processes across all profiles before starting
+            killed = kill_gateway_processes(all_profiles=True)
+            if killed:
+                print(f"✓ Killed {killed} stale gateway process(es) across all profiles")
+                _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
        if is_termux():
            print("Gateway service start is not supported on Termux because there is no system service manager.")
            print("Run manually: hermes gateway")
@@ -2949,7 +3024,39 @@ def gateway_command(args):
        # Try service first, fall back to killing and restarting
        service_available = False
        system = getattr(args, 'system', False)
+        restart_all = getattr(args, 'all', False)
        service_configured = False
+
+        if restart_all:
+            # --all: stop every gateway process across all profiles, then start fresh
+            service_stopped = False
+            if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                try:
+                    systemd_stop(system=system)
+                    service_stopped = True
+                except subprocess.CalledProcessError:
+                    pass
+            elif is_macos() and get_launchd_plist_path().exists():
+                try:
+                    launchd_stop()
+                    service_stopped = True
+                except subprocess.CalledProcessError:
+                    pass
+            killed = kill_gateway_processes(all_profiles=True)
+            total = killed + (1 if service_stopped else 0)
+            if total:
+                print(f"✓ Stopped {total} gateway process(es) across all profiles")
+            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
+
+            # Start the current profile's service fresh
+            print("Starting gateway...")
+            if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                systemd_start(system=system)
+            elif is_macos() and get_launchd_plist_path().exists():
+                launchd_start()
+            else:
+                run_gateway(verbose=0)
+            return
        
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
            service_configured = True
@@ -4036,7 +4036,40 @@ def cmd_update(args):
                                    capture_output=True, text=True, timeout=15,
                                )
                                if restart.returncode == 0:
-                                    restarted_services.append(svc_name)
+                                    # Verify the service actually survived the
+                                    # restart.  systemctl restart returns 0 even
+                                    # if the new process crashes immediately.
+                                    import time as _time
+                                    _time.sleep(3)
+                                    verify = subprocess.run(
+                                        scope_cmd + ["is-active", svc_name],
+                                        capture_output=True, text=True, timeout=5,
+                                    )
+                                    if verify.stdout.strip() == "active":
+                                        restarted_services.append(svc_name)
+                                    else:
+                                        # Retry once — transient startup failures
+                                        # (stale module cache, import race) often
+                                        # resolve on the second attempt.
+                                        print(f"  ⚠ {svc_name} died after restart, retrying...")
+                                        retry = subprocess.run(
+                                            scope_cmd + ["restart", svc_name],
+                                            capture_output=True, text=True, timeout=15,
+                                        )
+                                        _time.sleep(3)
+                                        verify2 = subprocess.run(
+                                            scope_cmd + ["is-active", svc_name],
+                                            capture_output=True, text=True, timeout=5,
+                                        )
+                                        if verify2.stdout.strip() == "active":
+                                            restarted_services.append(svc_name)
+                                            print(f"  ✓ {svc_name} recovered on retry")
+                                        else:
+                                            print(
+                                                f"  ✗ {svc_name} failed to stay running after restart.\n"
+                                                f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
+                                                f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
+                                            )
                                else:
                                    print(f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}")
                    except (FileNotFoundError, subprocess.TimeoutExpired):
@@ -4716,6 +4749,7 @@ For more help on a command:
    # gateway start
    gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service")
    gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
+    gateway_start.add_argument("--all", action="store_true", help="Kill ALL stale gateway processes across all profiles before starting")
    
    # gateway stop
    gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service")
@@ -4725,6 +4759,7 @@ For more help on a command:
    # gateway restart
    gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service")
    gateway_restart.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
+    gateway_restart.add_argument("--all", action="store_true", help="Kill ALL gateway processes across all profiles before restarting")
    
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
@@ -6011,7 +6046,37 @@ Examples:
        sys.exit(1)

    _processed_argv = _coalesce_session_name_args(sys.argv[1:])
-    args = parser.parse_args(_processed_argv)
+
+    # ── Defensive subparser routing (bpo-9338 workaround) ───────────
+    # On some Python versions (notably <3.11), argparse fails to route
+    # subcommand tokens when the parent parser has nargs='?' optional
+    # arguments (--continue).  The symptom: "unrecognized arguments: model"
+    # even though 'model' is a registered subcommand.
+    #
+    # Fix: when argv contains a token matching a known subcommand, set
+    # subparsers.required=True to force deterministic routing.  If that
+    # fails (e.g. 'hermes -c model' where 'model' is consumed as the
+    # session name for --continue), fall back to the default behaviour.
+    import io as _io
+    _known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    _has_cmd_token = any(t in _known_cmds for t in _processed_argv if not t.startswith("-"))
+
+    if _has_cmd_token:
+        subparsers.required = True
+        _saved_stderr = sys.stderr
+        try:
+            sys.stderr = _io.StringIO()
+            args = parser.parse_args(_processed_argv)
+            sys.stderr = _saved_stderr
+        except SystemExit:
+            sys.stderr = _saved_stderr
+            # Subcommand name was consumed as a flag value (e.g. -c model).
+            # Fall back to optional subparsers so argparse handles it normally.
+            subparsers.required = False
+            args = parser.parse_args(_processed_argv)
+    else:
+        subparsers.required = False
+        args = parser.parse_args(_processed_argv)

    # Handle --version flag
    if args.version:
@@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
    ("z-ai/glm-5.1",                    ""),
+    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
    ("moonshotai/kimi-k2.5",            ""),
    ("x-ai/grok-4.20",                  ""),
@@ -89,6 +90,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
        "z-ai/glm-5.1",
+        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "moonshotai/kimi-k2.5",
        "x-ai/grok-4.20-beta",
@@ -134,6 +136,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    "zai": [
        "glm-5.1",
        "glm-5",
+        "glm-5v-turbo",
        "glm-5-turbo",
        "glm-4.7",
        "glm-4.5",
@@ -63,6 +63,7 @@ CONFIGURABLE_TOOLSETS = [
    ("clarify",         "❓ Clarifying Questions",      "clarify"),
    ("delegation",      "👥 Task Delegation",           "delegate_task"),
    ("cronjob",         "⏰ Cron Jobs",                 "create/list/update/pause/resume/run, with optional attached skills"),
+    ("messaging",       "📨 Cross-Platform Messaging",  "send_message"),
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]
@@ -121,6 +122,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed OpenAI TTS billed to your subscription",
                "env_vars": [],
                "tts_provider": "openai",
@@ -130,13 +132,15 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Microsoft Edge TTS",
-                "tag": "Free - no API key needed",
+                "badge": "★ recommended · free",
+                "tag": "Good quality, no API key needed",
                "env_vars": [],
                "tts_provider": "edge",
            },
            {
                "name": "OpenAI TTS",
-                "tag": "Premium - high quality voices",
+                "badge": "paid",
+                "tag": "High quality voices",
                "env_vars": [
                    {"key": "VOICE_TOOLS_OPENAI_KEY", "prompt": "OpenAI API key", "url": "https://platform.openai.com/api-keys"},
                ],
@@ -144,7 +148,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "ElevenLabs",
-                "tag": "Premium - most natural voices",
+                "badge": "paid",
+                "tag": "Most natural voices",
                "env_vars": [
                    {"key": "ELEVENLABS_API_KEY", "prompt": "ElevenLabs API key", "url": "https://elevenlabs.io/app/settings/api-keys"},
                ],
@@ -152,7 +157,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Mistral (Voxtral TTS)",
-                "tag": "Multilingual, native Opus, needs MISTRAL_API_KEY",
+                "badge": "paid",
+                "tag": "Multilingual, native Opus",
                "env_vars": [
                    {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
                ],
@@ -168,6 +174,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed Firecrawl billed to your subscription",
                "web_backend": "firecrawl",
                "env_vars": [],
@@ -177,7 +184,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl Cloud",
-                "tag": "Hosted service - search, extract, and crawl",
+                "badge": "★ recommended",
+                "tag": "Full-featured search, extract, and crawl",
                "web_backend": "firecrawl",
                "env_vars": [
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
@@ -185,7 +193,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Exa",
-                "tag": "AI-native search and contents",
+                "badge": "paid",
+                "tag": "Neural search with semantic understanding",
                "web_backend": "exa",
                "env_vars": [
                    {"key": "EXA_API_KEY", "prompt": "Exa API key", "url": "https://exa.ai"},
@@ -193,7 +202,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Parallel",
-                "tag": "AI-native search and extract",
+                "badge": "paid",
+                "tag": "AI-powered search and extract",
                "web_backend": "parallel",
                "env_vars": [
                    {"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"},
@@ -201,7 +211,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Tavily",
-                "tag": "AI-native search, extract, and crawl",
+                "badge": "free tier",
+                "tag": "Search, extract, and crawl — 1000 free searches/mo",
                "web_backend": "tavily",
                "env_vars": [
                    {"key": "TAVILY_API_KEY", "prompt": "Tavily API key", "url": "https://app.tavily.com/home"},
@@ -209,7 +220,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl Self-Hosted",
-                "tag": "Free - run your own instance",
+                "badge": "free · self-hosted",
+                "tag": "Run your own Firecrawl instance (Docker)",
                "web_backend": "firecrawl",
                "env_vars": [
                    {"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
@@ -223,6 +235,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription",
+                "badge": "subscription",
                "tag": "Managed FAL image generation billed to your subscription",
                "env_vars": [],
                "requires_nous_auth": True,
@@ -231,6 +244,7 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "FAL.ai",
+                "badge": "paid",
                "tag": "FLUX 2 Pro with auto-upscaling",
                "env_vars": [
                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
@@ -244,6 +258,7 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Nous Subscription (Browser Use cloud)",
+                "badge": "subscription",
                "tag": "Managed Browser Use billed to your subscription",
                "env_vars": [],
                "browser_provider": "browser-use",
@@ -254,14 +269,16 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Local Browser",
-                "tag": "Free headless Chromium (no API key needed)",
+                "badge": "★ recommended · free",
+                "tag": "Headless Chromium, no API key needed",
                "env_vars": [],
                "browser_provider": "local",
                "post_setup": "agent_browser",
            },
            {
                "name": "Browserbase",
-                "tag": "Cloud browser with stealth & proxies",
+                "badge": "paid",
+                "tag": "Cloud browser with stealth and proxies",
                "env_vars": [
                    {"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
@@ -271,6 +288,7 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Browser Use",
+                "badge": "paid",
                "tag": "Cloud browser with remote execution",
                "env_vars": [
                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
@@ -280,6 +298,7 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Firecrawl",
+                "badge": "paid",
                "tag": "Cloud browser with remote execution",
                "env_vars": [
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
@@ -289,7 +308,8 @@ TOOL_CATEGORIES = {
            },
            {
                "name": "Camofox",
-                "tag": "Local anti-detection browser (Firefox/Camoufox)",
+                "badge": "free · local",
+                "tag": "Anti-detection browser (Firefox/Camoufox)",
                "env_vars": [
                    {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
                     "url": "https://github.com/jo-inc/camofox-browser"},
@@ -838,7 +858,8 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        # Plain text labels only (no ANSI codes in menu items)
        provider_choices = []
        for p in providers:
-            tag = f" ({p['tag']})" if p.get("tag") else ""
+            badge = f" [{p['badge']}]" if p.get("badge") else ""
+            tag = f" — {p['tag']}" if p.get("tag") else ""
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
@@ -848,7 +869,7 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{tag}{configured}")
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")

        # Add skip option
        provider_choices.append("Skip — keep defaults / configure later")
@@ -1104,7 +1125,8 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):

        provider_choices = []
        for p in providers:
-            tag = f" ({p['tag']})" if p.get("tag") else ""
+            badge = f" [{p['badge']}]" if p.get("badge") else ""
+            tag = f" — {p['tag']}" if p.get("tag") else ""
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
@@ -1114,7 +1136,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
                    configured = ""
                else:
                    configured = " [configured]"
-            provider_choices.append(f"{p['name']}{tag}{configured}")
+            provider_choices.append(f"{p['name']}{badge}{tag}{configured}")

        default_idx = _detect_active_provider_index(providers, config)

@@ -13,6 +13,7 @@ import asyncio
 import hmac
 import json
 import logging
+import os
 import secrets
 import sys
 import threading
@@ -319,12 +320,68 @@ class EnvVarReveal(BaseModel):
    key: str


+_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
+_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+
+
+def _probe_gateway_health() -> tuple[bool, dict | None]:
+    """Probe the gateway via its HTTP health endpoint (cross-container).
+
+    Uses ``/health/detailed`` first (returns full state), falling back to
+    the simpler ``/health`` endpoint.  Returns ``(is_alive, body_dict)``.
+
+    Accepts any of these as ``GATEWAY_HEALTH_URL``:
+    - ``http://gateway:8642``                (base URL — recommended)
+    - ``http://gateway:8642/health``         (explicit health path)
+    - ``http://gateway:8642/health/detailed`` (explicit detailed path)
+
+    This is a **blocking** call — run via ``run_in_executor`` from async code.
+    """
+    if not _GATEWAY_HEALTH_URL:
+        return False, None
+
+    # Normalise to base URL so we always probe the right paths regardless of
+    # whether the user included /health or /health/detailed in the env var.
+    base = _GATEWAY_HEALTH_URL.rstrip("/")
+    if base.endswith("/health/detailed"):
+        base = base[: -len("/health/detailed")]
+    elif base.endswith("/health"):
+        base = base[: -len("/health")]
+
+    for path in (f"{base}/health/detailed", f"{base}/health"):
+        try:
+            req = urllib.request.Request(path, method="GET")
+            with urllib.request.urlopen(req, timeout=_GATEWAY_HEALTH_TIMEOUT) as resp:
+                if resp.status == 200:
+                    body = json.loads(resp.read())
+                    return True, body
+        except Exception:
+            continue
+    return False, None
+
+
@app.get("/api/status")
 async def get_status():
    current_ver, latest_ver = check_config_version()

+    # --- Gateway liveness detection ---
+    # Try local PID check first (same-host).  If that fails and a remote
+    # GATEWAY_HEALTH_URL is configured, probe the gateway over HTTP so the
+    # dashboard works when the gateway runs in a separate container.
    gateway_pid = get_running_pid()
    gateway_running = gateway_pid is not None
+    remote_health_body: dict | None = None
+
+    if not gateway_running and _GATEWAY_HEALTH_URL:
+        loop = asyncio.get_event_loop()
+        alive, remote_health_body = await loop.run_in_executor(
+            None, _probe_gateway_health
+        )
+        if alive:
+            gateway_running = True
+            # PID from the remote container (display only — not locally valid)
+            if remote_health_body:
+                gateway_pid = remote_health_body.get("pid")

    gateway_state = None
    gateway_platforms: dict = {}
@@ -341,7 +398,12 @@ async def get_status():
    except Exception:
        configured_gateway_platforms = None

+    # Prefer the detailed health endpoint response (has full state) when the
+    # local runtime status file is absent or stale (cross-container).
    runtime = read_runtime_status()
+    if runtime is None and remote_health_body and remote_health_body.get("gateway_state"):
+        runtime = remote_health_body
+
    if runtime:
        gateway_state = runtime.get("gateway_state")
        gateway_platforms = runtime.get("platforms") or {}
@@ -356,6 +418,17 @@ async def get_status():
        if not gateway_running:
            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
            gateway_platforms = {}
+        elif gateway_running and remote_health_body is not None:
+            # The health probe confirmed the gateway is alive, but the local
+            # runtime status file may be stale (cross-container).  Override
+            # stopped/None state so the dashboard shows the correct badge.
+            if gateway_state in (None, "stopped"):
+                gateway_state = "running"
+
+    # If there was no runtime info at all but the health probe confirmed alive,
+    # ensure we still report the gateway as running (no shared volume scenario).
+    if gateway_running and gateway_state is None and remote_health_body is not None:
+        gateway_state = "running"

    active_sessions = 0
    try:
@@ -1904,7 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate):
@app.get("/api/analytics/usage")
 async def get_usage_analytics(days: int = 30):
    from hermes_state import SessionDB
-    db = SessionDB()
+    from hermes_constants import get_hermes_home
+    db = SessionDB(db_path=get_hermes_home() / "state.db")
    try:
        cutoff = time.time() - (days * 86400)
        cur = db._conn.execute("""
@@ -1912,10 +1986,12 @@ async def get_usage_analytics(days: int = 30):
                   SUM(input_tokens) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   SUM(cache_read_tokens) as cache_read_tokens,
+                   SUM(cache_write_tokens) as cache_write_tokens,
                   SUM(reasoning_tokens) as reasoning_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ?
            GROUP BY day ORDER BY day
        """, (cutoff,))
@@ -1925,10 +2001,13 @@ async def get_usage_analytics(days: int = 30):
            SELECT model,
                   SUM(input_tokens) as input_tokens,
                   SUM(output_tokens) as output_tokens,
+                   SUM(cache_read_tokens) as cache_read_tokens,
+                   SUM(cache_write_tokens) as cache_write_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ? AND model IS NOT NULL
-            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
+            GROUP BY model ORDER BY SUM(input_tokens) + SUM(cache_read_tokens) + SUM(cache_write_tokens) + SUM(output_tokens) DESC
        """, (cutoff,))
        by_model = [dict(r) for r in cur2.fetchall()]

@@ -1936,10 +2015,12 @@ async def get_usage_analytics(days: int = 30):
            SELECT SUM(input_tokens) as total_input,
                   SUM(output_tokens) as total_output,
                   SUM(cache_read_tokens) as total_cache_read,
+                   SUM(cache_write_tokens) as total_cache_write,
                   SUM(reasoning_tokens) as total_reasoning,
                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
            FROM sessions WHERE started_at > ?
        """, (cutoff,))
        totals = dict(cur3.fetchone())
@@ -358,6 +358,7 @@ def _add_rotating_handler(
    path.parent.mkdir(parents=True, exist_ok=True)
    handler = _ManagedRotatingFileHandler(
        str(path), maxBytes=max_bytes, backupCount=backup_count,
+        encoding="utf-8",
    )
    handler.setLevel(level)
    handler.setFormatter(formatter)
@@ -31,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 7

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
    cost_source TEXT,
    pricing_version TEXT,
    title TEXT,
+    api_call_count INTEGER DEFAULT 0,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -329,6 +330,17 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -426,6 +438,7 @@ class SessionDB:
        billing_provider: Optional[str] = None,
        billing_base_url: Optional[str] = None,
        billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
        absolute: bool = False,
    ) -> None:
        """Update token counters and backfill model if not already set.
@@ -455,7 +468,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                   WHERE id = ?"""
        else:
            sql = """UPDATE sessions SET
@@ -475,7 +489,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                   WHERE id = ?"""
        params = (
            input_tokens,
@@ -493,6 +508,7 @@ class SessionDB:
            billing_base_url,
            billing_mode,
            model,
+            api_call_count,
            session_id,
        )
        def _do(conn):
@@ -26,7 +26,7 @@ import logging
 import threading
 from typing import Dict, Any, List, Optional, Tuple

-from tools.registry import registry
+from tools.registry import discover_builtin_tools, registry
 from toolsets import resolve_toolset, validate_toolset

 logger = logging.getLogger(__name__)
@@ -129,45 +129,7 @@ def _run_async(coro):
 # Tool Discovery  (importing each module triggers its registry.register calls)
 # =============================================================================

-def _discover_tools():
-    """Import all tool modules to trigger their registry.register() calls.
-
-    Wrapped in a function so import errors in optional tools (e.g., fal_client
-    not installed) don't prevent the rest from loading.
-    """
-    _modules = [
-        "tools.web_tools",
-        "tools.terminal_tool",
-        "tools.file_tools",
-        "tools.vision_tools",
-        "tools.mixture_of_agents_tool",
-        "tools.image_generation_tool",
-        "tools.skills_tool",
-        "tools.skill_manager_tool",
-        "tools.browser_tool",
-        "tools.cronjob_tools",
-        "tools.rl_training_tool",
-        "tools.tts_tool",
-        "tools.todo_tool",
-        "tools.memory_tool",
-        "tools.session_search_tool",
-        "tools.clarify_tool",
-        "tools.code_execution_tool",
-        "tools.delegate_tool",
-        "tools.process_registry",
-        "tools.send_message_tool",
-        # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
-        "tools.homeassistant_tool",
-    ]
-    import importlib
-    for mod_name in _modules:
-        try:
-            importlib.import_module(mod_name)
-        except Exception as e:
-            logger.warning("Could not import tool module %s: %s", mod_name, e)
-
-
-_discover_tools()
+discover_builtin_tools()

 # MCP tool discovery (external MCP servers from config)
 try:
@@ -78,13 +78,13 @@ dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 feishu = ["lark-oapi>=1.5.3,<2"]
 web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
-  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
-  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
+  "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
+  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b",
  "fastapi>=0.104.0,<1",
  "uvicorn[standard]>=0.24.0,<1",
  "wandb>=0.15.0,<1",
 ]
-yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git ; python_version >= '3.12'"]
+yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
@@ -1268,6 +1268,19 @@ class AIAgent:
            try:
                _config_context_length = int(_config_context_length)
            except (TypeError, ValueError):
+                logger.warning(
+                    "Invalid model.context_length in config.yaml: %r — "
+                    "must be a plain integer (e.g. 256000, not '256K'). "
+                    "Falling back to auto-detection.",
+                    _config_context_length,
+                )
+                import sys
+                print(
+                    f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n"
+                    f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                    f"  Falling back to auto-detected context window.\n",
+                    file=sys.stderr,
+                )
                _config_context_length = None

        # Store for reuse in switch_model (so config override persists across model switches)
@@ -1296,7 +1309,20 @@ class AIAgent:
                                try:
                                    _config_context_length = int(_cp_ctx)
                                except (TypeError, ValueError):
-                                    pass
+                                    logger.warning(
+                                        "Invalid context_length for model %r in "
+                                        "custom_providers: %r — must be a plain "
+                                        "integer (e.g. 256000, not '256K'). "
+                                        "Falling back to auto-detection.",
+                                        self.model, _cp_ctx,
+                                    )
+                                    import sys
+                                    print(
+                                        f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
+                                        f"  Must be a plain integer (e.g. 256000, not '256K').\n"
+                                        f"  Falling back to auto-detected context window.\n",
+                                        file=sys.stderr,
+                                    )
                    break
        
        # Select context engine: config-driven (like memory providers).
@@ -6975,6 +7001,31 @@ class AIAgent:
                skip_pre_tool_call_hook=True,
            )

+    @staticmethod
+    def _wrap_verbose(label: str, text: str, indent: str = "     ") -> str:
+        """Word-wrap verbose tool output to fit the terminal width.
+
+        Splits *text* on existing newlines and wraps each line individually,
+        preserving intentional line breaks (e.g. pretty-printed JSON).
+        Returns a ready-to-print string with *label* on the first line and
+        continuation lines indented.
+        """
+        import shutil as _shutil
+        import textwrap as _tw
+        cols = _shutil.get_terminal_size((120, 24)).columns
+        wrap_width = max(40, cols - len(indent))
+        out_lines: list[str] = []
+        for raw_line in text.split("\n"):
+            if len(raw_line) <= wrap_width:
+                out_lines.append(raw_line)
+            else:
+                wrapped = _tw.wrap(raw_line, width=wrap_width,
+                                   break_long_words=True,
+                                   break_on_hyphens=False)
+                out_lines.extend(wrapped or [raw_line])
+        body = ("\n" + indent).join(out_lines)
+        return f"{indent}{label}{body}"
+
    def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
        """Execute multiple tool calls concurrently using a thread pool.

@@ -7045,7 +7096,7 @@ class AIAgent:
                args_str = json.dumps(args, ensure_ascii=False)
                if self.verbose_logging:
                    print(f"  📞 Tool {i}: {name}({list(args.keys())})")
-                    print(f"     Args: {args_str}")
+                    print(self._wrap_verbose("Args: ", json.dumps(args, indent=2, ensure_ascii=False)))
                else:
                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                    print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
@@ -7143,7 +7194,7 @@ class AIAgent:
            elif not self.quiet_mode:
                if self.verbose_logging:
                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
-                    print(f"     Result: {function_result}")
+                    print(self._wrap_verbose("Result: ", function_result))
                else:
                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
@@ -7236,7 +7287,7 @@ class AIAgent:
                args_str = json.dumps(function_args, ensure_ascii=False)
                if self.verbose_logging:
                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
-                    print(f"     Args: {args_str}")
+                    print(self._wrap_verbose("Args: ", json.dumps(function_args, indent=2, ensure_ascii=False)))
                else:
                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
@@ -7524,7 +7575,7 @@ class AIAgent:
            if not self.quiet_mode:
                if self.verbose_logging:
                    print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
-                    print(f"     Result: {function_result}")
+                    print(self._wrap_verbose("Result: ", function_result))
                else:
                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
                    print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
@@ -7807,6 +7858,7 @@ class AIAgent:
        self._incomplete_scratchpad_retries = 0
        self._codex_incomplete_retries = 0
        self._thinking_prefill_retries = 0
+        self._post_tool_empty_retried = False
        self._last_content_with_tools = None
        self._mute_post_response = False
        self._unicode_sanitization_passes = 0
@@ -7987,6 +8039,15 @@ class AIAgent:
                    # skipping them because conversation_history is still the
                    # pre-compression length.
                    conversation_history = None
+                    # Fix: reset retry counters after compression so the model
+                    # gets a fresh budget on the compressed context.  Without
+                    # this, pre-compression retries carry over and the model
+                    # hits "(empty)" immediately after compression-induced
+                    # context loss.
+                    self._empty_content_retries = 0
+                    self._thinking_prefill_retries = 0
+                    self._last_content_with_tools = None
+                    self._mute_post_response = False
                    # Re-estimate after compression
                    _preflight_tokens = estimate_request_tokens_rough(
                        messages,
@@ -8856,6 +8917,7 @@ class AIAgent:
                                    billing_mode="subscription_included"
                                    if cost_result.status == "included" else None,
                                    model=self.model,
+                                    api_call_count=1,
                                )
                            except Exception:
                                pass  # never block the agent loop
@@ -8962,12 +9024,40 @@ class AIAgent:
                            if isinstance(_default_headers, dict):
                                _headers_sanitized = _sanitize_structure_non_ascii(_default_headers)

+                            # Sanitize the API key — non-ASCII characters in
+                            # credentials (e.g. ʋ instead of v from a bad
+                            # copy-paste) cause httpx to fail when encoding
+                            # the Authorization header as ASCII.  This is the
+                            # most common cause of persistent UnicodeEncodeError
+                            # that survives message/tool sanitization (#6843).
+                            _credential_sanitized = False
+                            _raw_key = getattr(self, "api_key", None) or ""
+                            if _raw_key:
+                                _clean_key = _strip_non_ascii(_raw_key)
+                                if _clean_key != _raw_key:
+                                    self.api_key = _clean_key
+                                    if isinstance(getattr(self, "_client_kwargs", None), dict):
+                                        self._client_kwargs["api_key"] = _clean_key
+                                    # Also update the live client — it holds its
+                                    # own copy of api_key which auth_headers reads
+                                    # dynamically on every request.
+                                    if getattr(self, "client", None) is not None and hasattr(self.client, "api_key"):
+                                        self.client.api_key = _clean_key
+                                    _credential_sanitized = True
+                                    self._vprint(
+                                        f"{self.log_prefix}⚠️  API key contained non-ASCII characters "
+                                        f"(bad copy-paste?) — stripped them. If auth fails, "
+                                        f"re-copy the key from your provider's dashboard.",
+                                        force=True,
+                                    )
+
                            if (
                                _messages_sanitized
                                or _prefill_sanitized
                                or _tools_sanitized
                                or _system_sanitized
                                or _headers_sanitized
+                                or _credential_sanitized
                            ):
                                self._unicode_sanitization_passes += 1
                                self._vprint(
@@ -9255,7 +9345,9 @@ class AIAgent:
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
-                                "partial": True
+                                "partial": True,
+                                "failed": True,
+                                "compression_exhausted": True,
                            }
                        self._emit_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")

@@ -9284,7 +9376,9 @@ class AIAgent:
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": "Request payload too large (413). Cannot compress further.",
-                                "partial": True
+                                "partial": True,
+                                "failed": True,
+                                "compression_exhausted": True,
                            }

                    # Check for context-length errors BEFORE generic 4xx handler.
@@ -9335,7 +9429,9 @@ class AIAgent:
                                    "completed": False,
                                    "api_calls": api_call_count,
                                    "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
-                                    "partial": True
+                                    "partial": True,
+                                    "failed": True,
+                                    "compression_exhausted": True,
                                }
                            restart_with_compressed_messages = True
                            break
@@ -9385,7 +9481,9 @@ class AIAgent:
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
-                                "partial": True
+                                "partial": True,
+                                "failed": True,
+                                "compression_exhausted": True,
                            }
                        self._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")

@@ -9416,7 +9514,9 @@ class AIAgent:
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
-                                "partial": True
+                                "partial": True,
+                                "failed": True,
+                                "compression_exhausted": True,
                            }

                    # Check for non-retryable client errors.  The classifier
@@ -10038,6 +10138,10 @@ class AIAgent:
                    if _had_prefill:
                        self._thinking_prefill_retries = 0
                        self._empty_content_retries = 0
+                    # Successful tool execution — reset the post-tool nudge
+                    # flag so it can fire again if the model goes empty on
+                    # a LATER tool round.
+                    self._post_tool_empty_retried = False

                    messages.append(assistant_msg)
                    self._emit_interim_assistant_message(assistant_msg)
@@ -10154,6 +10258,13 @@ class AIAgent:
                    # No tool calls - this is the final response
                    final_response = assistant_message.content or ""
                    
+                    # Fix: unmute output when entering the no-tool-call branch
+                    # so the user can see empty-response warnings and recovery
+                    # status messages.  _mute_post_response was set during a
+                    # prior housekeeping tool turn and should not silence the
+                    # final response path.
+                    self._mute_post_response = False
+                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
                        # ── Partial stream recovery ─────────────────────
@@ -10191,20 +10302,56 @@ class AIAgent:
                            self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
                            self._last_content_with_tools = None
                            self._empty_content_retries = 0
-                            for i in range(len(messages) - 1, -1, -1):
-                                msg = messages[i]
-                                if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                                    tool_names = []
-                                    for tc in msg["tool_calls"]:
-                                        if not tc or not isinstance(tc, dict): continue
-                                        fn = tc.get("function", {})
-                                        tool_names.append(fn.get("name", "unknown"))
-                                    msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
-                                    break
+                            # Do NOT modify the assistant message content — the
+                            # old code injected "Calling the X tools..." which
+                            # poisoned the conversation history.  Just use the
+                            # fallback text as the final response and break.
                            final_response = self._strip_think_blocks(fallback).strip()
                            self._response_was_previewed = True
                            break

+                        # ── Post-tool-call empty response nudge ───────────
+                        # The model returned empty after executing tool calls
+                        # but there's no prior-turn content to fall back on.
+                        # Instead of giving up, nudge the model to continue by
+                        # appending a user-level hint.  This is the #9400 case:
+                        # weaker models (GLM-5, etc.) sometimes return empty
+                        # after tool results instead of continuing to the next
+                        # step.  One retry with a nudge usually fixes it.
+                        _prior_was_tool = any(
+                            m.get("role") == "tool"
+                            for m in messages[-5:]  # check recent messages
+                        )
+                        if (
+                            _prior_was_tool
+                            and not getattr(self, "_post_tool_empty_retried", False)
+                        ):
+                            self._post_tool_empty_retried = True
+                            logger.info(
+                                "Empty response after tool calls — nudging model "
+                                "to continue processing"
+                            )
+                            self._emit_status(
+                                "⚠️ Model returned empty after tool calls — "
+                                "nudging to continue"
+                            )
+                            # Append the empty assistant message first so the
+                            # message sequence stays valid:
+                            #   tool(result) → assistant("(empty)") → user(nudge)
+                            # Without this, we'd have tool → user which most
+                            # APIs reject as an invalid sequence.
+                            assistant_msg["content"] = "(empty)"
+                            messages.append(assistant_msg)
+                            messages.append({
+                                "role": "user",
+                                "content": (
+                                    "You just executed tool calls but returned an "
+                                    "empty response. Please process the tool "
+                                    "results above and continue with the task."
+                                ),
+                            })
+                            continue
+
                        # ── Thinking-only prefill continuation ──────────
                        # The model produced structured reasoning (via API
                        # fields) but no visible text content.  Rather than
@@ -62,6 +62,7 @@ AUTHOR_MAP = {
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
    "259807879+Bartok9@users.noreply.github.com": "Bartok9",
+    "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
    # contributors (manual mapping from git names)
    "dmayhem93@gmail.com": "dmahan93",
    "samherring99@gmail.com": "samherring99",
@@ -94,7 +95,9 @@ AUTHOR_MAP = {
    "vincentcharlebois@gmail.com": "vincentcharlebois",
    "aryan@synvoid.com": "aryansingh",
    "johnsonblake1@gmail.com": "blakejohnson",
+    "greer.guthrie@gmail.com": "g-guthrie",
    "kennyx102@gmail.com": "bobashopcashier",
+    "shokatalishaikh95@gmail.com": "areu01or00",
    "bryan@intertwinesys.com": "bryanyoung",
    "christo.mitov@gmail.com": "christomitov",
    "hermes@nousresearch.com": "NousResearch",
@@ -114,6 +117,8 @@ AUTHOR_MAP = {
    "m@statecraft.systems": "mbierling",
    "balyan.sid@gmail.com": "balyansid",
    "oluwadareab12@gmail.com": "bennytimz",
+    "simon@simonmarcus.org": "simon-marcus",
+    "1243352777@qq.com": "zons-zhaozhy",
    # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
    #    crossref, and GH contributor list matching (April 2026 audit) ──
    "1115117931@qq.com": "aaronagent",
@@ -8,7 +8,7 @@
    "start": "node bridge.js"
  },
  "dependencies": {
-    "@whiskeysockets/baileys": "WhiskeySockets/Baileys#fix/abprops-abt-fetch",
+    "@whiskeysockets/baileys": "WhiskeySockets/Baileys#01047debd81beb20da7b7779b08edcb06aa03770",
    "express": "^4.21.0",
    "qrcode-terminal": "^0.12.0",
    "pino": "^9.0.0"
@@ -650,9 +650,9 @@ registry.register(
 )
 ```

-**2. Add import** in `model_tools.py` → `_discover_tools()` list.
+**2. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list.

-**3. Add to `toolsets.py`** → `_HERMES_CORE_TOOLS` list.
+Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual list needed.

 All handlers must return JSON strings. Use `get_hermes_home()` for paths, never hardcode `~/.hermes`.

@@ -0,0 +1,129 @@
+---
+name: architecture-diagram
+description: Generate professional dark-themed system architecture diagrams as standalone HTML/SVG files. Self-contained output with no external dependencies. Based on Cocoon AI's architecture-diagram-generator (MIT).
+version: 1.0.0
+author: Cocoon AI (hello@cocoon-ai.com), ported by Hermes Agent
+license: MIT
+dependencies: []
+metadata:
+  hermes:
+    tags: [architecture, diagrams, SVG, HTML, visualization, infrastructure, cloud]
+    related_skills: [excalidraw]
+---
+
+# Architecture Diagram Skill
+
+Generate professional, dark-themed technical architecture diagrams as standalone HTML files with inline SVG graphics. No external tools, no API keys, no rendering libraries — just write the HTML file and open it in a browser.
+
+Based on [Cocoon AI's architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) (MIT).
+
+## Workflow
+
+1. User describes their system architecture (components, connections, technologies)
+2. Generate the HTML file following the design system below
+3. Save with `write_file` to a `.html` file (e.g. `~/architecture-diagram.html`)
+4. User opens in any browser — works offline, no dependencies
+
+### Output Location
+
+Save diagrams to a user-specified path, or default to the current working directory:
+```
+./[project-name]-architecture.html
+```
+
+### Preview
+
+After saving, suggest the user open it:
+```bash
+# macOS
+open ./my-architecture.html
+# Linux
+xdg-open ./my-architecture.html
+```
+
+## Design System & Visual Language
+
+### Color Palette (Semantic Mapping)
+
+Use specific `rgba` fills and hex strokes to categorize components:
+
+| Component Type | Fill (rgba) | Stroke (Hex) |
+| :--- | :--- | :--- |
+| **Frontend** | `rgba(8, 51, 68, 0.4)` | `#22d3ee` (cyan-400) |
+| **Backend** | `rgba(6, 78, 59, 0.4)` | `#34d399` (emerald-400) |
+| **Database** | `rgba(76, 29, 149, 0.4)` | `#a78bfa` (violet-400) |
+| **AWS/Cloud** | `rgba(120, 53, 15, 0.3)` | `#fbbf24` (amber-400) |
+| **Security** | `rgba(136, 19, 55, 0.4)` | `#fb7185` (rose-400) |
+| **Message Bus** | `rgba(251, 146, 60, 0.3)` | `#fb923c` (orange-400) |
+| **External** | `rgba(30, 41, 59, 0.5)` | `#94a3b8` (slate-400) |
+
+### Typography & Background
+- **Font:** JetBrains Mono (Monospace), loaded from Google Fonts
+- **Sizes:** 12px (Names), 9px (Sublabels), 8px (Annotations), 7px (Tiny labels)
+- **Background:** Slate-950 (`#020617`) with a subtle 40px grid pattern
+
+```svg
+<!-- Background Grid Pattern -->
+<pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse">
+  <path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/>
+</pattern>
+```
+
+## Technical Implementation Details
+
+### Component Rendering
+Components are rounded rectangles (`rx="6"`) with 1.5px strokes. To prevent arrows from showing through semi-transparent fills, use a **double-rect masking technique**:
+1. Draw an opaque background rect (`#0f172a`)
+2. Draw the semi-transparent styled rect on top
+
+### Connection Rules
+- **Z-Order:** Draw arrows *early* in the SVG (after the grid) so they render behind component boxes
+- **Arrowheads:** Defined via SVG markers
+- **Security Flows:** Use dashed lines in rose color (`#fb7185`)
+- **Boundaries:**
+  - *Security Groups:* Dashed (`4,4`), rose color
+  - *Regions:* Large dashed (`8,4`), amber color, `rx="12"`
+
+### Spacing & Layout Logic
+- **Standard Height:** 60px (Services); 80-120px (Large components)
+- **Vertical Gap:** Minimum 40px between components
+- **Message Buses:** Must be placed *in the gap* between services, not overlapping them
+- **Legend Placement:** **CRITICAL.** Must be placed outside all boundary boxes. Calculate the lowest Y-coordinate of all boundaries and place the legend at least 20px below it.
+
+## Document Structure
+
+The generated HTML file follows a four-part layout:
+1. **Header:** Title with a pulsing dot indicator and subtitle
+2. **Main SVG:** The diagram contained within a rounded border card
+3. **Summary Cards:** A grid of three cards below the diagram for high-level details
+4. **Footer:** Minimal metadata
+
+### Info Card Pattern
+```html
+<div class="card">
+  <div class="card-header">
+    <div class="card-dot cyan"></div>
+    <h3>Title</h3>
+  </div>
+  <ul>
+    <li>• Item one</li>
+    <li>• Item two</li>
+  </ul>
+</div>
+```
+
+## Output Requirements
+- **Single File:** One self-contained `.html` file
+- **No External Dependencies:** All CSS and SVG must be inline (except Google Fonts)
+- **No JavaScript:** Use pure CSS for any animations (like pulsing dots)
+- **Compatibility:** Must render correctly in any modern web browser
+
+## Template Reference
+
+Load the full HTML template for the exact structure, CSS, and SVG component examples:
+
+```
+skill_view(name="architecture-diagram", file_path="templates/template.html")
+```
+
+The template contains working examples of every component type (frontend, backend, database, cloud, security), arrow styles (standard, dashed, curved), security groups, region boundaries, and the legend — use it as your structural reference when generating diagrams.
@@ -0,0 +1,319 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>[PROJECT NAME] Architecture Diagram</title>
+  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet">
+  <style>
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+    
+    body {
+      font-family: 'JetBrains Mono', monospace;
+      background: #020617;
+      min-height: 100vh;
+      padding: 2rem;
+      color: white;
+    }
+    
+    .container {
+      max-width: 1200px;
+      margin: 0 auto;
+    }
+    
+    .header {
+      margin-bottom: 2rem;
+    }
+    
+    .header-row {
+      display: flex;
+      align-items: center;
+      gap: 1rem;
+      margin-bottom: 0.5rem;
+    }
+    
+    .pulse-dot {
+      width: 12px;
+      height: 12px;
+      background: #22d3ee;
+      border-radius: 50%;
+      animation: pulse 2s infinite;
+    }
+    
+    @keyframes pulse {
+      0%, 100% { opacity: 1; }
+      50% { opacity: 0.5; }
+    }
+    
+    h1 {
+      font-size: 1.5rem;
+      font-weight: 700;
+      letter-spacing: -0.025em;
+    }
+    
+    .subtitle {
+      color: #94a3b8;
+      font-size: 0.875rem;
+      margin-left: 1.75rem;
+    }
+    
+    .diagram-container {
+      background: rgba(15, 23, 42, 0.5);
+      border-radius: 1rem;
+      border: 1px solid #1e293b;
+      padding: 1.5rem;
+      overflow-x: auto;
+    }
+    
+    svg {
+      width: 100%;
+      min-width: 900px;
+      display: block;
+    }
+    
+    .cards {
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+      gap: 1rem;
+      margin-top: 2rem;
+    }
+    
+    .card {
+      background: rgba(15, 23, 42, 0.5);
+      border-radius: 0.75rem;
+      border: 1px solid #1e293b;
+      padding: 1.25rem;
+    }
+    
+    .card-header {
+      display: flex;
+      align-items: center;
+      gap: 0.5rem;
+      margin-bottom: 0.75rem;
+    }
+    
+    .card-dot {
+      width: 8px;
+      height: 8px;
+      border-radius: 50%;
+    }
+    
+    .card-dot.cyan { background: #22d3ee; }
+    .card-dot.emerald { background: #34d399; }
+    .card-dot.violet { background: #a78bfa; }
+    .card-dot.amber { background: #fbbf24; }
+    .card-dot.rose { background: #fb7185; }
+    
+    .card h3 {
+      font-size: 0.875rem;
+      font-weight: 600;
+    }
+    
+    .card ul {
+      list-style: none;
+      color: #94a3b8;
+      font-size: 0.75rem;
+    }
+    
+    .card li {
+      margin-bottom: 0.375rem;
+    }
+    
+    .footer {
+      text-align: center;
+      margin-top: 1.5rem;
+      color: #475569;
+      font-size: 0.75rem;
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <!-- Header -->
+    <div class="header">
+      <div class="header-row">
+        <div class="pulse-dot"></div>
+        <h1>[PROJECT NAME] Architecture</h1>
+      </div>
+      <p class="subtitle">[Subtitle description]</p>
+    </div>
+
+    <!-- Main Diagram -->
+    <div class="diagram-container">
+      <svg viewBox="0 0 1000 680">
+        <!-- Definitions -->
+        <defs>
+          <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+            <polygon points="0 0, 10 3.5, 0 7" fill="#64748b" />
+          </marker>
+          <pattern id="grid" width="40" height="40" patternUnits="userSpaceOnUse">
+            <path d="M 40 0 L 0 0 0 40" fill="none" stroke="#1e293b" stroke-width="0.5"/>
+          </pattern>
+        </defs>
+
+        <!-- Background Grid -->
+        <rect width="100%" height="100%" fill="url(#grid)" />
+
+        <!-- =================================================================
+             COMPONENT EXAMPLES - Copy and customize these patterns
+             ================================================================= -->
+
+        <!-- External/Generic Component -->
+        <rect x="30" y="280" width="100" height="50" rx="6" fill="rgba(30, 41, 59, 0.5)" stroke="#94a3b8" stroke-width="1.5"/>
+        <text x="80" y="300" fill="white" font-size="11" font-weight="600" text-anchor="middle">Users</text>
+        <text x="80" y="316" fill="#94a3b8" font-size="9" text-anchor="middle">Browser/Mobile</text>
+
+        <!-- Security Component -->
+        <rect x="30" y="80" width="100" height="60" rx="6" fill="rgba(136, 19, 55, 0.4)" stroke="#fb7185" stroke-width="1.5"/>
+        <text x="80" y="105" fill="white" font-size="11" font-weight="600" text-anchor="middle">Auth Provider</text>
+        <text x="80" y="121" fill="#94a3b8" font-size="9" text-anchor="middle">OAuth 2.0</text>
+
+        <!-- Region/Cloud Boundary -->
+        <rect x="160" y="40" width="820" height="620" rx="12" fill="rgba(251, 191, 36, 0.05)" stroke="#fbbf24" stroke-width="1" stroke-dasharray="8,4"/>
+        <text x="172" y="58" fill="#fbbf24" font-size="10" font-weight="600">AWS Region: us-west-2</text>
+
+        <!-- AWS/Cloud Service -->
+        <rect x="200" y="280" width="110" height="50" rx="6" fill="rgba(120, 53, 15, 0.3)" stroke="#fbbf24" stroke-width="1.5"/>
+        <text x="255" y="300" fill="white" font-size="11" font-weight="600" text-anchor="middle">CloudFront</text>
+        <text x="255" y="316" fill="#94a3b8" font-size="9" text-anchor="middle">CDN</text>
+
+        <!-- Multi-line AWS Component (S3 Buckets example) -->
+        <rect x="200" y="380" width="110" height="100" rx="6" fill="rgba(120, 53, 15, 0.3)" stroke="#fbbf24" stroke-width="1.5"/>
+        <text x="255" y="400" fill="white" font-size="11" font-weight="600" text-anchor="middle">S3 Buckets</text>
+        <text x="255" y="420" fill="#94a3b8" font-size="8" text-anchor="middle">• bucket-one</text>
+        <text x="255" y="434" fill="#94a3b8" font-size="8" text-anchor="middle">• bucket-two</text>
+        <text x="255" y="448" fill="#94a3b8" font-size="8" text-anchor="middle">• bucket-three</text>
+        <text x="255" y="466" fill="#fbbf24" font-size="7" text-anchor="middle">OAI Protected</text>
+
+        <!-- Security Group (dashed boundary) -->
+        <rect x="350" y="265" width="120" height="80" rx="8" fill="transparent" stroke="#fb7185" stroke-width="1" stroke-dasharray="4,4"/>
+        <text x="358" y="279" fill="#fb7185" font-size="8">sg-name :port</text>
+        
+        <!-- Component inside security group -->
+        <rect x="360" y="280" width="100" height="50" rx="6" fill="rgba(120, 53, 15, 0.3)" stroke="#fbbf24" stroke-width="1.5"/>
+        <text x="410" y="300" fill="white" font-size="11" font-weight="600" text-anchor="middle">Load Balancer</text>
+        <text x="410" y="316" fill="#94a3b8" font-size="9" text-anchor="middle">HTTPS :443</text>
+
+        <!-- Backend Component -->
+        <rect x="510" y="280" width="110" height="50" rx="6" fill="rgba(6, 78, 59, 0.4)" stroke="#34d399" stroke-width="1.5"/>
+        <text x="565" y="300" fill="white" font-size="11" font-weight="600" text-anchor="middle">API Server</text>
+        <text x="565" y="316" fill="#94a3b8" font-size="9" text-anchor="middle">FastAPI :8000</text>
+
+        <!-- Database Component -->
+        <rect x="700" y="280" width="120" height="50" rx="6" fill="rgba(76, 29, 149, 0.4)" stroke="#a78bfa" stroke-width="1.5"/>
+        <text x="760" y="300" fill="white" font-size="11" font-weight="600" text-anchor="middle">Database</text>
+        <text x="760" y="316" fill="#94a3b8" font-size="9" text-anchor="middle">PostgreSQL</text>
+
+        <!-- Frontend Component -->
+        <rect x="200" y="520" width="200" height="110" rx="8" fill="rgba(8, 51, 68, 0.4)" stroke="#22d3ee" stroke-width="1.5"/>
+        <text x="300" y="545" fill="white" font-size="12" font-weight="600" text-anchor="middle">Frontend</text>
+        <text x="300" y="565" fill="#94a3b8" font-size="9" text-anchor="middle">React + TypeScript</text>
+        <text x="300" y="580" fill="#94a3b8" font-size="9" text-anchor="middle">Additional detail</text>
+        <text x="300" y="595" fill="#94a3b8" font-size="9" text-anchor="middle">More info</text>
+        <text x="300" y="615" fill="#22d3ee" font-size="8" text-anchor="middle">domain.example.com</text>
+
+        <!-- =================================================================
+             ARROW EXAMPLES
+             ================================================================= -->
+
+        <!-- Standard arrow with label -->
+        <line x1="130" y1="305" x2="198" y2="305" stroke="#22d3ee" stroke-width="1.5" marker-end="url(#arrowhead)"/>
+        <text x="164" y="299" fill="#94a3b8" font-size="9" text-anchor="middle">HTTPS</text>
+        
+        <!-- Simple arrow (no label) -->
+        <line x1="310" y1="305" x2="358" y2="305" stroke="#22d3ee" stroke-width="1.5" marker-end="url(#arrowhead)"/>
+        
+        <!-- Vertical arrow -->
+        <line x1="255" y1="330" x2="255" y2="378" stroke="#fbbf24" stroke-width="1.5" marker-end="url(#arrowhead)"/>
+        <text x="270" y="358" fill="#94a3b8" font-size="9">OAI</text>
+        
+        <!-- Dashed arrow (for auth/security flows) -->
+        <line x1="460" y1="305" x2="508" y2="305" stroke="#34d399" stroke-width="1.5" marker-end="url(#arrowhead)"/>
+        <line x1="620" y1="305" x2="698" y2="305" stroke="#a78bfa" stroke-width="1.5" marker-end="url(#arrowhead)"/>
+        <text x="655" y="299" fill="#94a3b8" font-size="9">TLS</text>
+
+        <!-- Curved path for auth flow -->
+        <path d="M 80 140 L 80 200 Q 80 220 100 220 L 200 220 Q 220 220 220 240 L 220 278" fill="none" stroke="#fb7185" stroke-width="1.5" stroke-dasharray="5,5"/>
+        <text x="150" y="210" fill="#fb7185" font-size="8">JWT + PKCE</text>
+
+        <!-- =================================================================
+             LEGEND
+             ================================================================= -->
+        <text x="720" y="70" fill="white" font-size="10" font-weight="600">Legend</text>
+        
+        <rect x="720" y="82" width="16" height="10" rx="2" fill="rgba(8, 51, 68, 0.4)" stroke="#22d3ee" stroke-width="1"/>
+        <text x="742" y="90" fill="#94a3b8" font-size="8">Frontend</text>
+        
+        <rect x="720" y="98" width="16" height="10" rx="2" fill="rgba(6, 78, 59, 0.4)" stroke="#34d399" stroke-width="1"/>
+        <text x="742" y="106" fill="#94a3b8" font-size="8">Backend</text>
+        
+        <rect x="720" y="114" width="16" height="10" rx="2" fill="rgba(120, 53, 15, 0.3)" stroke="#fbbf24" stroke-width="1"/>
+        <text x="742" y="122" fill="#94a3b8" font-size="8">Cloud Service</text>
+        
+        <rect x="720" y="130" width="16" height="10" rx="2" fill="rgba(76, 29, 149, 0.4)" stroke="#a78bfa" stroke-width="1"/>
+        <text x="742" y="138" fill="#94a3b8" font-size="8">Database</text>
+        
+        <rect x="720" y="146" width="16" height="10" rx="2" fill="rgba(136, 19, 55, 0.4)" stroke="#fb7185" stroke-width="1"/>
+        <text x="742" y="154" fill="#94a3b8" font-size="8">Security</text>
+        
+        <line x1="720" y1="168" x2="736" y2="168" stroke="#fb7185" stroke-width="1" stroke-dasharray="3,3"/>
+        <text x="742" y="171" fill="#94a3b8" font-size="8">Auth Flow</text>
+        
+        <rect x="720" y="178" width="16" height="10" rx="2" fill="transparent" stroke="#fb7185" stroke-width="1" stroke-dasharray="3,3"/>
+        <text x="742" y="186" fill="#94a3b8" font-size="8">Security Group</text>
+      </svg>
+    </div>
+
+    <!-- Info Cards -->
+    <div class="cards">
+      <div class="card">
+        <div class="card-header">
+          <div class="card-dot rose"></div>
+          <h3>Card Title 1</h3>
+        </div>
+        <ul>
+          <li>• Item one</li>
+          <li>• Item two</li>
+          <li>• Item three</li>
+          <li>• Item four</li>
+        </ul>
+      </div>
+
+      <div class="card">
+        <div class="card-header">
+          <div class="card-dot amber"></div>
+          <h3>Card Title 2</h3>
+        </div>
+        <ul>
+          <li>• Item one</li>
+          <li>• Item two</li>
+          <li>• Item three</li>
+          <li>• Item four</li>
+        </ul>
+      </div>
+
+      <div class="card">
+        <div class="card-header">
+          <div class="card-dot violet"></div>
+          <h3>Card Title 3</h3>
+        </div>
+        <ul>
+          <li>• Item one</li>
+          <li>• Item two</li>
+          <li>• Item three</li>
+          <li>• Item four</li>
+        </ul>
+      </div>
+    </div>
+
+    <!-- Footer -->
+    <p class="footer">
+      [Project Name] • [Additional metadata]
+    </p>
+  </div>
+</body>
+</html>
@@ -1,35 +1,19 @@
 ---
 name: google-workspace
-description: Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration via gws CLI (googleworkspace/cli). Uses OAuth2 with automatic token refresh via bridge script. Requires gws binary.
-version: 2.0.0
+description: Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise.
+version: 1.0.0
 author: Nous Research
 license: MIT
-required_credential_files:
-  - path: google_token.json
-    description: Google OAuth2 token (created by setup script)
-  - path: google_client_secret.json
-    description: Google OAuth2 client credentials (downloaded from Google Cloud Console)
 metadata:
  hermes:
-    tags: [Google, Gmail, Calendar, Drive, Sheets, Docs, Contacts, Email, OAuth, gws]
+    tags: [Google, Gmail, Calendar, Drive, Sheets, Docs, Contacts, Email, OAuth]
    homepage: https://github.com/NousResearch/hermes-agent
    related_skills: [himalaya]
 ---

 # Google Workspace

-Gmail, Calendar, Drive, Contacts, Sheets, and Docs — powered by `gws` (Google's official Rust CLI). The skill provides a backward-compatible Python wrapper that handles OAuth token refresh and delegates to `gws`.
-
-## Architecture
-
-```
-google_api.py  →  gws_bridge.py  →  gws CLI
-(argparse compat)  (token refresh)    (Google APIs)
-```
-
- `setup.py` handles OAuth2 (headless-compatible, works on CLI/Telegram/Discord)
- `gws_bridge.py` refreshes the Hermes token and injects it into `gws` via `GOOGLE_WORKSPACE_CLI_TOKEN`
- `google_api.py` provides the same CLI interface as v1 but delegates to `gws`
+Gmail, Calendar, Drive, Contacts, Sheets, and Docs — through Hermes-managed OAuth and a thin CLI wrapper. When `gws` is installed, the skill uses it as the execution backend for broader Google Workspace coverage; otherwise it falls back to the bundled Python client implementation.

 ## References

@@ -38,22 +22,7 @@ google_api.py  →  gws_bridge.py  →  gws CLI
 ## Scripts

 - `scripts/setup.py` — OAuth2 setup (run once to authorize)
- `scripts/gws_bridge.py` — Token refresh bridge to gws CLI
- `scripts/google_api.py` — Backward-compatible API wrapper (delegates to gws)
-
-## Prerequisites
-
-Install `gws`:
-
-```bash
-cargo install google-workspace-cli
-# or via npm (recommended, downloads prebuilt binary):
-npm install -g @googleworkspace/cli
-# or via Homebrew:
-brew install googleworkspace-cli
-```
-
-Verify: `gws --version`
+- `scripts/google_api.py` — compatibility wrapper CLI. It prefers `gws` for operations when available, while preserving Hermes' existing JSON output contract.

 ## First-Time Setup

@@ -63,13 +32,7 @@ on CLI, Telegram, Discord, or any platform.
 Define a shorthand first:

 ```bash
-HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-GWORKSPACE_SKILL_DIR="$HERMES_HOME/skills/productivity/google-workspace"
-PYTHON_BIN="${HERMES_PYTHON:-python3}"
-if [ -x "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
-  PYTHON_BIN="$HERMES_HOME/hermes-agent/venv/bin/python"
-fi
-GSETUP="$PYTHON_BIN $GWORKSPACE_SKILL_DIR/scripts/setup.py"
+GSETUP="python ~/.hermes/skills/productivity/google-workspace/scripts/setup.py"
 ```

 ### Step 0: Check if already set up
@@ -82,88 +45,166 @@ If it prints `AUTHENTICATED`, skip to Usage — setup is already done.

 ### Step 1: Triage — ask the user what they need

+Before starting OAuth setup, ask the user TWO questions:
+
 **Question 1: "What Google services do you need? Just email, or also
 Calendar/Drive/Sheets/Docs?"**

- **Email only** → Use the `himalaya` skill instead — simpler setup.
- **Calendar, Drive, Sheets, Docs (or email + these)** → Continue below.
+- **Email only** → They don't need this skill at all. Use the `himalaya` skill
+  instead — it works with a Gmail App Password (Settings → Security → App
+  Passwords) and takes 2 minutes to set up. No Google Cloud project needed.
+  Load the himalaya skill and follow its setup instructions.

-**Partial scopes**: Users can authorize only a subset of services. The setup
-script accepts partial scopes and warns about missing ones.
+- **Email + Calendar** → Continue with this skill, but use
+  `--services email,calendar` during auth so the consent screen only asks for
+  the scopes they actually need.

-**Question 2: "Does your Google account use Advanced Protection?"**
+- **Calendar/Drive/Sheets/Docs only** → Continue with this skill and use a
+  narrower `--services` set like `calendar,drive,sheets,docs`.

- **No / Not sure** → Normal setup.
- **Yes** → Workspace admin must add the OAuth client ID to allowed apps first.
+- **Full Workspace access** → Continue with this skill and use the default
+  `all` service set.
+
+**Question 2: "Does your Google account use Advanced Protection (hardware
+security keys required to sign in)? If you're not sure, you probably don't
+— it's something you would have explicitly enrolled in."**
+
+- **No / Not sure** → Normal setup. Continue below.
+- **Yes** → Their Workspace admin must add the OAuth client ID to the org's
+  allowed apps list before Step 4 will work. Let them know upfront.

 ### Step 2: Create OAuth credentials (one-time, ~5 minutes)

 Tell the user:

-> 1. Go to https://console.cloud.google.com/apis/credentials
-> 2. Create a project (or use an existing one)
-> 3. Enable the APIs you need (Gmail, Calendar, Drive, Sheets, Docs, People)
-> 4. Credentials → Create Credentials → OAuth 2.0 Client ID → Desktop app
-> 5. Download JSON and tell me the file path
+> You need a Google Cloud OAuth client. This is a one-time setup:
+>
+> 1. Create or select a project:
+>    https://console.cloud.google.com/projectselector2/home/dashboard
+> 2. Enable the required APIs from the API Library:
+>    https://console.cloud.google.com/apis/library
+>    Enable: Gmail API, Google Calendar API, Google Drive API,
+>    Google Sheets API, Google Docs API, People API
+> 3. Create the OAuth client here:
+>    https://console.cloud.google.com/apis/credentials
+>    Credentials → Create Credentials → OAuth 2.0 Client ID
+> 4. Application type: "Desktop app" → Create
+> 5. If the app is still in Testing, add the user's Google account as a test user here:
+>    https://console.cloud.google.com/auth/audience
+>    Audience → Test users → Add users
+> 6. Download the JSON file and tell me the file path
+>
+> Important Hermes CLI note: if the file path starts with `/`, do NOT send only the bare path as its own message in the CLI, because it can be mistaken for a slash command. Send it in a sentence instead, like:
+> `The JSON file path is: /home/user/Downloads/client_secret_....json`
+
+Once they provide the path:

 ```bash
 $GSETUP --client-secret /path/to/client_secret.json
 ```

+If they paste the raw client ID / client secret values instead of a file path,
+write a valid Desktop OAuth JSON file for them yourself, save it somewhere
+explicit (for example `~/Downloads/hermes-google-client-secret.json`), then run
+`--client-secret` against that file.
+
 ### Step 3: Get authorization URL

+Use the service set chosen in Step 1. Examples:
+
 ```bash
-$GSETUP --auth-url
+$GSETUP --auth-url --services email,calendar --format json
+$GSETUP --auth-url --services calendar,drive,sheets,docs --format json
+$GSETUP --auth-url --services all --format json
 ```

-Send the URL to the user. After authorizing, they paste back the redirect URL or code.
+This returns JSON with an `auth_url` field and also saves the exact URL to
+`~/.hermes/google_oauth_last_url.txt`.
+
+Agent rules for this step:
+- Extract the `auth_url` field and send that exact URL to the user as a single line.
+- Tell the user that the browser will likely fail on `http://localhost:1` after approval, and that this is expected.
+- Tell them to copy the ENTIRE redirected URL from the browser address bar.
+- If the user gets `Error 403: access_denied`, send them directly to `https://console.cloud.google.com/auth/audience` to add themselves as a test user.

 ### Step 4: Exchange the code

+The user will paste back either a URL like `http://localhost:1/?code=4/0A...&scope=...`
+or just the code string. Either works. The `--auth-url` step stores a temporary
+pending OAuth session locally so `--auth-code` can complete the PKCE exchange
+later, even on headless systems:
+
 ```bash
-$GSETUP --auth-code "THE_URL_OR_CODE_THE_USER_PASTED"
+$GSETUP --auth-code "THE_URL_OR_CODE_THE_USER_PASTED" --format json
 ```

+If `--auth-code` fails because the code expired, was already used, or came from
+an older browser tab, it now returns a fresh `fresh_auth_url`. In that case,
+immediately send the new URL to the user and have them retry with the newest
+browser redirect only.
+
 ### Step 5: Verify

 ```bash
 $GSETUP --check
 ```

-Should print `AUTHENTICATED`. Token refreshes automatically from now on.
+Should print `AUTHENTICATED`. Setup is complete — token refreshes automatically from now on.
+
+### Notes
+
+- Token is stored at `~/.hermes/google_token.json` and auto-refreshes.
+- Pending OAuth session state/verifier are stored temporarily at `~/.hermes/google_oauth_pending.json` until exchange completes.
+- If `gws` is installed, `google_api.py` points it at the same `~/.hermes/google_token.json` credentials file. Users do not need to run a separate `gws auth login` flow.
+- To revoke: `$GSETUP --revoke`

 ## Usage

-All commands go through the API script:
+All commands go through the API script. Set `GAPI` as a shorthand:

 ```bash
-HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-GWORKSPACE_SKILL_DIR="$HERMES_HOME/skills/productivity/google-workspace"
-PYTHON_BIN="${HERMES_PYTHON:-python3}"
-if [ -x "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
-  PYTHON_BIN="$HERMES_HOME/hermes-agent/venv/bin/python"
-fi
-GAPI="$PYTHON_BIN $GWORKSPACE_SKILL_DIR/scripts/google_api.py"
+GAPI="python ~/.hermes/skills/productivity/google-workspace/scripts/google_api.py"
 ```

 ### Gmail

 ```bash
+# Search (returns JSON array with id, from, subject, date, snippet)
 $GAPI gmail search "is:unread" --max 10
+$GAPI gmail search "from:boss@company.com newer_than:1d"
+$GAPI gmail search "has:attachment filename:pdf newer_than:7d"
+
+# Read full message (returns JSON with body text)
 $GAPI gmail get MESSAGE_ID
+
+# Send
 $GAPI gmail send --to user@example.com --subject "Hello" --body "Message text"
-$GAPI gmail send --to user@example.com --subject "Report" --body "<h1>Q4</h1>" --html
+$GAPI gmail send --to user@example.com --subject "Report" --body "<h1>Q4</h1><p>Details...</p>" --html
+$GAPI gmail send --to user@example.com --subject "Hello" --from '"Research Agent" <user@example.com>' --body "Message text"
+
+# Reply (automatically threads and sets In-Reply-To)
 $GAPI gmail reply MESSAGE_ID --body "Thanks, that works for me."
+$GAPI gmail reply MESSAGE_ID --from '"Support Bot" <user@example.com>' --body "Thanks"
+
+# Labels
 $GAPI gmail labels
 $GAPI gmail modify MESSAGE_ID --add-labels LABEL_ID
+$GAPI gmail modify MESSAGE_ID --remove-labels UNREAD
 ```

 ### Calendar

 ```bash
+# List events (defaults to next 7 days)
 $GAPI calendar list
-$GAPI calendar create --summary "Standup" --start 2026-03-01T10:00:00+01:00 --end 2026-03-01T10:30:00+01:00
-$GAPI calendar create --summary "Review" --start ... --end ... --attendees "alice@co.com,bob@co.com"
+$GAPI calendar list --start 2026-03-01T00:00:00Z --end 2026-03-07T23:59:59Z
+
+# Create event (ISO 8601 with timezone required)
+$GAPI calendar create --summary "Team Standup" --start 2026-03-01T10:00:00-06:00 --end 2026-03-01T10:30:00-06:00
+$GAPI calendar create --summary "Lunch" --start 2026-03-01T12:00:00Z --end 2026-03-01T13:00:00Z --location "Cafe"
+$GAPI calendar create --summary "Review" --start 2026-03-01T14:00:00Z --end 2026-03-01T15:00:00Z --attendees "alice@co.com,bob@co.com"
+
+# Delete event
 $GAPI calendar delete EVENT_ID
 ```

@@ -183,8 +224,13 @@ $GAPI contacts list --max 20
 ### Sheets

 ```bash
+# Read
 $GAPI sheets get SHEET_ID "Sheet1!A1:D10"
+
+# Write
 $GAPI sheets update SHEET_ID "Sheet1!A1:B2" --values '[["Name","Score"],["Alice","95"]]'
+
+# Append rows
 $GAPI sheets append SHEET_ID "Sheet1!A:C" --values '[["new","row","data"]]'
 ```

@@ -194,52 +240,37 @@ $GAPI sheets append SHEET_ID "Sheet1!A:C" --values '[["new","row","data"]]'
 $GAPI docs get DOC_ID
 ```

-### Direct gws access (advanced)
-
-For operations not covered by the wrapper, use `gws_bridge.py` directly:
-
-```bash
-GBRIDGE="$PYTHON_BIN $GWORKSPACE_SKILL_DIR/scripts/gws_bridge.py"
-$GBRIDGE calendar +agenda --today --format table
-$GBRIDGE gmail +triage --labels --format json
-$GBRIDGE drive +upload ./report.pdf
-$GBRIDGE sheets +read --spreadsheet SHEET_ID --range "Sheet1!A1:D10"
-```
-
 ## Output Format

-All commands return JSON via `gws --format json`. Key output shapes:
+All commands return JSON. Parse with `jq` or read directly. Key fields:

- **Gmail search/triage**: Array of message summaries (sender, subject, date, snippet)
- **Gmail get/read**: Message object with headers and body text
- **Gmail send/reply**: Confirmation with message ID
- **Calendar list/agenda**: Array of event objects (summary, start, end, location)
- **Calendar create**: Confirmation with event ID and htmlLink
- **Drive search**: Array of file objects (id, name, mimeType, webViewLink)
- **Sheets get/read**: 2D array of cell values
- **Docs get**: Full document JSON (use `body.content` for text extraction)
- **Contacts list**: Array of person objects with names, emails, phones
-
-Parse output with `jq` or read JSON directly.
+- **Gmail search**: `[{id, threadId, from, to, subject, date, snippet, labels}]`
+- **Gmail get**: `{id, threadId, from, to, subject, date, labels, body}`
+- **Gmail send/reply**: `{status: "sent", id, threadId}`
+- **Calendar list**: `[{id, summary, start, end, location, description, htmlLink}]`
+- **Calendar create**: `{status: "created", id, summary, htmlLink}`
+- **Drive search**: `[{id, name, mimeType, modifiedTime, webViewLink}]`
+- **Contacts list**: `[{name, emails: [...], phones: [...]}]`
+- **Sheets get**: `[[cell, cell, ...], ...]`

 ## Rules

-1. **Never send email or create/delete events without confirming with the user first.**
-2. **Check auth before first use** — run `setup.py --check`.
-3. **Use the Gmail search syntax reference** for complex queries.
-4. **Calendar times must include timezone** — ISO 8601 with offset or UTC.
-5. **Respect rate limits** — avoid rapid-fire sequential API calls.
+1. **Never send email or create/delete events without confirming with the user first.** Show the draft content and ask for approval.
+2. **Check auth before first use** — run `setup.py --check`. If it fails, guide the user through setup.
+3. **Use the Gmail search syntax reference** for complex queries — load it with `skill_view("google-workspace", file_path="references/gmail-search-syntax.md")`.
+4. **Calendar times must include timezone** — always use ISO 8601 with offset (e.g., `2026-03-01T10:00:00-06:00`) or UTC (`Z`).
+5. **Respect rate limits** — avoid rapid-fire sequential API calls. Batch reads when possible.

 ## Troubleshooting

 | Problem | Fix |
 |---------|-----|
-| `NOT_AUTHENTICATED` | Run setup Steps 2-5 |
-| `REFRESH_FAILED` | Token revoked — redo Steps 3-5 |
-| `gws: command not found` | Install: `npm install -g @googleworkspace/cli` |
-| `HttpError 403` | Missing scope — `$GSETUP --revoke` then redo Steps 3-5 |
-| `HttpError 403: Access Not Configured` | Enable API in Google Cloud Console |
-| Advanced Protection blocks auth | Admin must allowlist the OAuth client ID |
+| `NOT_AUTHENTICATED` | Run setup Steps 2-5 above |
+| `REFRESH_FAILED` | Token revoked or expired — redo Steps 3-5 |
+| `HttpError 403: Insufficient Permission` | Missing API scope — `$GSETUP --revoke` then redo Steps 3-5 |
+| `HttpError 403: Access Not Configured` | API not enabled — user needs to enable it in Google Cloud Console |
+| `ModuleNotFoundError` | Run `$GSETUP --install-deps` |
+| Advanced Protection blocks auth | Workspace admin must allowlist the OAuth client ID |

 ## Revoking Access

@@ -1,17 +1,17 @@
 #!/usr/bin/env python3
 """Google Workspace API CLI for Hermes Agent.

-Thin wrapper that delegates to gws (googleworkspace/cli) via gws_bridge.py.
-Maintains the same CLI interface for backward compatibility with Hermes skills.
+Uses the Google Workspace CLI (`gws`) when available, but preserves the
+existing Hermes-facing JSON contract and falls back to the Python client
+libraries if `gws` is not installed.

 Usage:
  python google_api.py gmail search "is:unread" [--max 10]
  python google_api.py gmail get MESSAGE_ID
  python google_api.py gmail send --to user@example.com --subject "Hi" --body "Hello"
  python google_api.py gmail reply MESSAGE_ID --body "Thanks"
-  python google_api.py calendar list [--start DATE] [--end DATE] [--calendar primary]
+  python google_api.py calendar list [--from DATE] [--to DATE] [--calendar primary]
  python google_api.py calendar create --summary "Meeting" --start DATETIME --end DATETIME
-  python google_api.py calendar delete EVENT_ID
  python google_api.py drive search "budget report" [--max 10]
  python google_api.py contacts list [--max 20]
  python google_api.py sheets get SHEET_ID RANGE
@@ -21,47 +21,396 @@ Usage:
 """

 import argparse
+import base64
 import json
 import os
+import shutil
 import subprocess
 import sys
+from datetime import datetime, timedelta, timezone
+from email.mime.text import MIMEText
 from pathlib import Path

-BRIDGE = Path(__file__).parent / "gws_bridge.py"
-PYTHON = sys.executable
+HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+TOKEN_PATH = HERMES_HOME / "google_token.json"
+CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"
+
+SCOPES = [
+    "https://www.googleapis.com/auth/gmail.readonly",
+    "https://www.googleapis.com/auth/gmail.send",
+    "https://www.googleapis.com/auth/gmail.modify",
+    "https://www.googleapis.com/auth/calendar",
+    "https://www.googleapis.com/auth/drive.readonly",
+    "https://www.googleapis.com/auth/contacts.readonly",
+    "https://www.googleapis.com/auth/spreadsheets",
+    "https://www.googleapis.com/auth/documents.readonly",
+]


-def gws(*args: str) -> None:
-    """Call gws via the bridge and exit with its return code."""
+def _ensure_authenticated():
+    if not TOKEN_PATH.exists():
+        print("Not authenticated. Run the setup script first:", file=sys.stderr)
+        print(f"  python {Path(__file__).parent / 'setup.py'}", file=sys.stderr)
+        sys.exit(1)
+
+
+def _stored_token_scopes() -> list[str]:
+    try:
+        data = json.loads(TOKEN_PATH.read_text())
+    except Exception:
+        return list(SCOPES)
+    scopes = data.get("scopes")
+    if isinstance(scopes, list) and scopes:
+        return scopes
+    return list(SCOPES)
+
+
+def _gws_binary() -> str | None:
+    override = os.getenv("HERMES_GWS_BIN")
+    if override:
+        return override
+    return shutil.which("gws")
+
+
+def _gws_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["GOOGLE_WORKSPACE_CLI_CREDENTIALS_FILE"] = str(TOKEN_PATH)
+    return env
+
+
+def _run_gws(parts: list[str], *, params: dict | None = None, body: dict | None = None):
+    binary = _gws_binary()
+    if not binary:
+        raise RuntimeError("gws not installed")
+
+    _ensure_authenticated()
+
+    cmd = [binary, *parts]
+    if params is not None:
+        cmd.extend(["--params", json.dumps(params)])
+    if body is not None:
+        cmd.extend(["--json", json.dumps(body)])
+
    result = subprocess.run(
-        [PYTHON, str(BRIDGE)] + list(args),
-        env={**os.environ, "HERMES_HOME": os.environ.get("HERMES_HOME", str(Path.home() / ".hermes"))},
+        cmd,
+        capture_output=True,
+        text=True,
+        env=_gws_env(),
    )
-    sys.exit(result.returncode)
+    if result.returncode != 0:
+        err = result.stderr.strip() or result.stdout.strip() or "Unknown gws error"
+        print(err, file=sys.stderr)
+        sys.exit(result.returncode or 1)
+
+    stdout = result.stdout.strip()
+    if not stdout:
+        return {}
+
+    try:
+        return json.loads(stdout)
+    except json.JSONDecodeError:
+        print("ERROR: Unexpected non-JSON output from gws:", file=sys.stderr)
+        print(stdout, file=sys.stderr)
+        sys.exit(1)


-# -- Gmail --
+def _headers_dict(msg: dict) -> dict[str, str]:
+    return {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
+
+
+def _extract_message_body(msg: dict) -> str:
+    body = ""
+    payload = msg.get("payload", {})
+    if payload.get("body", {}).get("data"):
+        body = base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8", errors="replace")
+    elif payload.get("parts"):
+        for part in payload["parts"]:
+            if part.get("mimeType") == "text/plain" and part.get("body", {}).get("data"):
+                body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8", errors="replace")
+                break
+        if not body:
+            for part in payload["parts"]:
+                if part.get("mimeType") == "text/html" and part.get("body", {}).get("data"):
+                    body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8", errors="replace")
+                    break
+    return body
+
+
+def _extract_doc_text(doc: dict) -> str:
+    text_parts = []
+    for element in doc.get("body", {}).get("content", []):
+        paragraph = element.get("paragraph", {})
+        for pe in paragraph.get("elements", []):
+            text_run = pe.get("textRun", {})
+            if text_run.get("content"):
+                text_parts.append(text_run["content"])
+    return "".join(text_parts)
+
+
+def _datetime_with_timezone(value: str) -> str:
+    if not value:
+        return value
+    if "T" not in value:
+        return value
+    if value.endswith("Z"):
+        return value
+    tail = value[10:]
+    if "+" in tail or "-" in tail:
+        return value
+    return value + "Z"
+
+
+def get_credentials():
+    """Load and refresh credentials from token file."""
+    _ensure_authenticated()
+
+    from google.oauth2.credentials import Credentials
+    from google.auth.transport.requests import Request
+
+    creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), _stored_token_scopes())
+    if creds.expired and creds.refresh_token:
+        creds.refresh(Request())
+        TOKEN_PATH.write_text(creds.to_json())
+    if not creds.valid:
+        print("Token is invalid. Re-run setup.", file=sys.stderr)
+        sys.exit(1)
+    return creds
+
+
+def build_service(api, version):
+    from googleapiclient.discovery import build
+
+    return build(api, version, credentials=get_credentials())
+
+
+# =========================================================================
+# Gmail
+# =========================================================================
+

 def gmail_search(args):
-    cmd = ["gmail", "+triage", "--query", args.query, "--max", str(args.max), "--format", "json"]
-    gws(*cmd)
+    if _gws_binary():
+        results = _run_gws(
+            ["gmail", "users", "messages", "list"],
+            params={"userId": "me", "q": args.query, "maxResults": args.max},
+        )
+        messages = results.get("messages", [])
+        output = []
+        for msg_meta in messages:
+            msg = _run_gws(
+                ["gmail", "users", "messages", "get"],
+                params={
+                    "userId": "me",
+                    "id": msg_meta["id"],
+                    "format": "metadata",
+                    "metadataHeaders": ["From", "To", "Subject", "Date"],
+                },
+            )
+            headers = _headers_dict(msg)
+            output.append(
+                {
+                    "id": msg["id"],
+                    "threadId": msg["threadId"],
+                    "from": headers.get("From", ""),
+                    "to": headers.get("To", ""),
+                    "subject": headers.get("Subject", ""),
+                    "date": headers.get("Date", ""),
+                    "snippet": msg.get("snippet", ""),
+                    "labels": msg.get("labelIds", []),
+                }
+            )
+        print(json.dumps(output, indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("gmail", "v1")
+    results = service.users().messages().list(
+        userId="me", q=args.query, maxResults=args.max
+    ).execute()
+    messages = results.get("messages", [])
+    if not messages:
+        print("No messages found.")
+        return
+
+    output = []
+    for msg_meta in messages:
+        msg = service.users().messages().get(
+            userId="me", id=msg_meta["id"], format="metadata",
+            metadataHeaders=["From", "To", "Subject", "Date"],
+        ).execute()
+        headers = _headers_dict(msg)
+        output.append({
+            "id": msg["id"],
+            "threadId": msg["threadId"],
+            "from": headers.get("From", ""),
+            "to": headers.get("To", ""),
+            "subject": headers.get("Subject", ""),
+            "date": headers.get("Date", ""),
+            "snippet": msg.get("snippet", ""),
+            "labels": msg.get("labelIds", []),
+        })
+    print(json.dumps(output, indent=2, ensure_ascii=False))
+
+

 def gmail_get(args):
-    gws("gmail", "+read", "--id", args.message_id, "--headers", "--format", "json")
+    if _gws_binary():
+        msg = _run_gws(
+            ["gmail", "users", "messages", "get"],
+            params={"userId": "me", "id": args.message_id, "format": "full"},
+        )
+        headers = _headers_dict(msg)
+        result = {
+            "id": msg["id"],
+            "threadId": msg["threadId"],
+            "from": headers.get("From", ""),
+            "to": headers.get("To", ""),
+            "subject": headers.get("Subject", ""),
+            "date": headers.get("Date", ""),
+            "labels": msg.get("labelIds", []),
+            "body": _extract_message_body(msg),
+        }
+        print(json.dumps(result, indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("gmail", "v1")
+    msg = service.users().messages().get(
+        userId="me", id=args.message_id, format="full"
+    ).execute()
+
+    headers = _headers_dict(msg)
+    result = {
+        "id": msg["id"],
+        "threadId": msg["threadId"],
+        "from": headers.get("From", ""),
+        "to": headers.get("To", ""),
+        "subject": headers.get("Subject", ""),
+        "date": headers.get("Date", ""),
+        "labels": msg.get("labelIds", []),
+        "body": _extract_message_body(msg),
+    }
+    print(json.dumps(result, indent=2, ensure_ascii=False))
+
+

 def gmail_send(args):
-    cmd = ["gmail", "+send", "--to", args.to, "--subject", args.subject, "--body", args.body, "--format", "json"]
+    if _gws_binary():
+        message = MIMEText(args.body, "html" if args.html else "plain")
+        message["to"] = args.to
+        message["subject"] = args.subject
+        if args.cc:
+            message["cc"] = args.cc
+        if args.from_header:
+            message["from"] = args.from_header
+
+        raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
+        body = {"raw": raw}
+        if args.thread_id:
+            body["threadId"] = args.thread_id
+
+        result = _run_gws(
+            ["gmail", "users", "messages", "send"],
+            params={"userId": "me"},
+            body=body,
+        )
+        print(json.dumps({"status": "sent", "id": result["id"], "threadId": result.get("threadId", "")}, indent=2))
+        return
+
+    service = build_service("gmail", "v1")
+    message = MIMEText(args.body, "html" if args.html else "plain")
+    message["to"] = args.to
+    message["subject"] = args.subject
    if args.cc:
-        cmd += ["--cc", args.cc]
-    if args.html:
-        cmd.append("--html")
-    gws(*cmd)
+        message["cc"] = args.cc
+    if args.from_header:
+        message["from"] = args.from_header
+
+    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
+    body = {"raw": raw}
+
+    if args.thread_id:
+        body["threadId"] = args.thread_id
+
+    result = service.users().messages().send(userId="me", body=body).execute()
+    print(json.dumps({"status": "sent", "id": result["id"], "threadId": result.get("threadId", "")}, indent=2))
+
+

 def gmail_reply(args):
-    gws("gmail", "+reply", "--message-id", args.message_id, "--body", args.body, "--format", "json")
+    if _gws_binary():
+        original = _run_gws(
+            ["gmail", "users", "messages", "get"],
+            params={
+                "userId": "me",
+                "id": args.message_id,
+                "format": "metadata",
+                "metadataHeaders": ["From", "Subject", "Message-ID"],
+            },
+        )
+        headers = _headers_dict(original)
+
+        subject = headers.get("Subject", "")
+        if not subject.startswith("Re:"):
+            subject = f"Re: {subject}"
+
+        message = MIMEText(args.body)
+        message["to"] = headers.get("From", "")
+        message["subject"] = subject
+        if args.from_header:
+            message["from"] = args.from_header
+        if headers.get("Message-ID"):
+            message["In-Reply-To"] = headers["Message-ID"]
+            message["References"] = headers["Message-ID"]
+
+        raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
+        result = _run_gws(
+            ["gmail", "users", "messages", "send"],
+            params={"userId": "me"},
+            body={"raw": raw, "threadId": original["threadId"]},
+        )
+        print(json.dumps({"status": "sent", "id": result["id"], "threadId": result.get("threadId", "")}, indent=2))
+        return
+
+    service = build_service("gmail", "v1")
+    original = service.users().messages().get(
+        userId="me", id=args.message_id, format="metadata",
+        metadataHeaders=["From", "Subject", "Message-ID"],
+    ).execute()
+    headers = _headers_dict(original)
+
+    subject = headers.get("Subject", "")
+    if not subject.startswith("Re:"):
+        subject = f"Re: {subject}"
+
+    message = MIMEText(args.body)
+    message["to"] = headers.get("From", "")
+    message["subject"] = subject
+    if args.from_header:
+        message["from"] = args.from_header
+    if headers.get("Message-ID"):
+        message["In-Reply-To"] = headers["Message-ID"]
+        message["References"] = headers["Message-ID"]
+
+    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
+    body = {"raw": raw, "threadId": original["threadId"]}
+
+    result = service.users().messages().send(userId="me", body=body).execute()
+    print(json.dumps({"status": "sent", "id": result["id"], "threadId": result.get("threadId", "")}, indent=2))
+
+

 def gmail_labels(args):
-    gws("gmail", "users", "labels", "list", "--params", json.dumps({"userId": "me"}), "--format", "json")
+    if _gws_binary():
+        results = _run_gws(["gmail", "users", "labels", "list"], params={"userId": "me"})
+        labels = [{"id": l["id"], "name": l["name"], "type": l.get("type", "")} for l in results.get("labels", [])]
+        print(json.dumps(labels, indent=2))
+        return
+
+    service = build_service("gmail", "v1")
+    results = service.users().labels().list(userId="me").execute()
+    labels = [{"id": l["id"], "name": l["name"], "type": l.get("type", "")} for l in results.get("labels", [])]
+    print(json.dumps(labels, indent=2))
+
+

 def gmail_modify(args):
    body = {}
@@ -69,145 +418,310 @@ def gmail_modify(args):
        body["addLabelIds"] = args.add_labels.split(",")
    if args.remove_labels:
        body["removeLabelIds"] = args.remove_labels.split(",")
-    gws(
-        "gmail", "users", "messages", "modify",
-        "--params", json.dumps({"userId": "me", "id": args.message_id}),
-        "--json", json.dumps(body),
-        "--format", "json",
-    )
+
+    if _gws_binary():
+        result = _run_gws(
+            ["gmail", "users", "messages", "modify"],
+            params={"userId": "me", "id": args.message_id},
+            body=body,
+        )
+        print(json.dumps({"id": result["id"], "labels": result.get("labelIds", [])}, indent=2))
+        return
+
+    service = build_service("gmail", "v1")
+    result = service.users().messages().modify(userId="me", id=args.message_id, body=body).execute()
+    print(json.dumps({"id": result["id"], "labels": result.get("labelIds", [])}, indent=2))


-# -- Calendar --
+# =========================================================================
+# Calendar
+# =========================================================================
+

 def calendar_list(args):
-    if args.start or args.end:
-        # Specific date range — use raw Calendar API for precise timeMin/timeMax
-        from datetime import datetime, timedelta, timezone as tz
-        now = datetime.now(tz.utc)
-        time_min = args.start or now.isoformat()
-        time_max = args.end or (now + timedelta(days=7)).isoformat()
-        gws(
-            "calendar", "events", "list",
-            "--params", json.dumps({
+    now = datetime.now(timezone.utc)
+    time_min = _datetime_with_timezone(args.start or now.isoformat())
+    time_max = _datetime_with_timezone(args.end or (now + timedelta(days=7)).isoformat())
+
+    if _gws_binary():
+        results = _run_gws(
+            ["calendar", "events", "list"],
+            params={
                "calendarId": args.calendar,
                "timeMin": time_min,
                "timeMax": time_max,
                "maxResults": args.max,
                "singleEvents": True,
                "orderBy": "startTime",
-            }),
-            "--format", "json",
+            },
        )
-    else:
-        # No date range — use +agenda helper (defaults to 7 days)
-        cmd = ["calendar", "+agenda", "--days", "7", "--format", "json"]
-        if args.calendar != "primary":
-            cmd += ["--calendar", args.calendar]
-        gws(*cmd)
+        events = []
+        for e in results.get("items", []):
+            events.append({
+                "id": e["id"],
+                "summary": e.get("summary", "(no title)"),
+                "start": e.get("start", {}).get("dateTime", e.get("start", {}).get("date", "")),
+                "end": e.get("end", {}).get("dateTime", e.get("end", {}).get("date", "")),
+                "location": e.get("location", ""),
+                "description": e.get("description", ""),
+                "status": e.get("status", ""),
+                "htmlLink": e.get("htmlLink", ""),
+            })
+        print(json.dumps(events, indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("calendar", "v3")
+    results = service.events().list(
+        calendarId=args.calendar, timeMin=time_min, timeMax=time_max,
+        maxResults=args.max, singleEvents=True, orderBy="startTime",
+    ).execute()
+
+    events = []
+    for e in results.get("items", []):
+        events.append({
+            "id": e["id"],
+            "summary": e.get("summary", "(no title)"),
+            "start": e.get("start", {}).get("dateTime", e.get("start", {}).get("date", "")),
+            "end": e.get("end", {}).get("dateTime", e.get("end", {}).get("date", "")),
+            "location": e.get("location", ""),
+            "description": e.get("description", ""),
+            "status": e.get("status", ""),
+            "htmlLink": e.get("htmlLink", ""),
+        })
+    print(json.dumps(events, indent=2, ensure_ascii=False))
+
+

 def calendar_create(args):
-    cmd = [
-        "calendar", "+insert",
-        "--summary", args.summary,
-        "--start", args.start,
-        "--end", args.end,
-        "--format", "json",
-    ]
+    event = {
+        "summary": args.summary,
+        "start": {"dateTime": args.start},
+        "end": {"dateTime": args.end},
+    }
    if args.location:
-        cmd += ["--location", args.location]
+        event["location"] = args.location
    if args.description:
-        cmd += ["--description", args.description]
+        event["description"] = args.description
    if args.attendees:
-        for email in args.attendees.split(","):
-            cmd += ["--attendee", email.strip()]
-    if args.calendar != "primary":
-        cmd += ["--calendar", args.calendar]
-    gws(*cmd)
+        event["attendees"] = [{"email": e.strip()} for e in args.attendees.split(",") if e.strip()]
+
+    if _gws_binary():
+        result = _run_gws(
+            ["calendar", "events", "insert"],
+            params={"calendarId": args.calendar},
+            body=event,
+        )
+        print(json.dumps({
+            "status": "created",
+            "id": result["id"],
+            "summary": result.get("summary", ""),
+            "htmlLink": result.get("htmlLink", ""),
+        }, indent=2))
+        return
+
+    service = build_service("calendar", "v3")
+    result = service.events().insert(calendarId=args.calendar, body=event).execute()
+    print(json.dumps({
+        "status": "created",
+        "id": result["id"],
+        "summary": result.get("summary", ""),
+        "htmlLink": result.get("htmlLink", ""),
+    }, indent=2))
+
+

 def calendar_delete(args):
-    gws(
-        "calendar", "events", "delete",
-        "--params", json.dumps({"calendarId": args.calendar, "eventId": args.event_id}),
-        "--format", "json",
-    )
+    if _gws_binary():
+        _run_gws(["calendar", "events", "delete"], params={"calendarId": args.calendar, "eventId": args.event_id})
+        print(json.dumps({"status": "deleted", "eventId": args.event_id}))
+        return
+
+    service = build_service("calendar", "v3")
+    service.events().delete(calendarId=args.calendar, eventId=args.event_id).execute()
+    print(json.dumps({"status": "deleted", "eventId": args.event_id}))


-# -- Drive --
+# =========================================================================
+# Drive
+# =========================================================================
+

 def drive_search(args):
    query = args.query if args.raw_query else f"fullText contains '{args.query}'"
-    gws(
-        "drive", "files", "list",
-        "--params", json.dumps({
-            "q": query,
-            "pageSize": args.max,
-            "fields": "files(id,name,mimeType,modifiedTime,webViewLink)",
-        }),
-        "--format", "json",
-    )
+    if _gws_binary():
+        results = _run_gws(
+            ["drive", "files", "list"],
+            params={
+                "q": query,
+                "pageSize": args.max,
+                "fields": "files(id, name, mimeType, modifiedTime, webViewLink)",
+            },
+        )
+        print(json.dumps(results.get("files", []), indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("drive", "v3")
+    results = service.files().list(
+        q=query, pageSize=args.max, fields="files(id, name, mimeType, modifiedTime, webViewLink)",
+    ).execute()
+    files = results.get("files", [])
+    print(json.dumps(files, indent=2, ensure_ascii=False))


-# -- Contacts --
+# =========================================================================
+# Contacts
+# =========================================================================
+

 def contacts_list(args):
-    gws(
-        "people", "people", "connections", "list",
-        "--params", json.dumps({
-            "resourceName": "people/me",
-            "pageSize": args.max,
-            "personFields": "names,emailAddresses,phoneNumbers",
-        }),
-        "--format", "json",
-    )
+    if _gws_binary():
+        results = _run_gws(
+            ["people", "people", "connections", "list"],
+            params={
+                "resourceName": "people/me",
+                "pageSize": args.max,
+                "personFields": "names,emailAddresses,phoneNumbers",
+            },
+        )
+        contacts = []
+        for person in results.get("connections", []):
+            names = person.get("names", [{}])
+            emails = person.get("emailAddresses", [])
+            phones = person.get("phoneNumbers", [])
+            contacts.append({
+                "name": names[0].get("displayName", "") if names else "",
+                "emails": [e.get("value", "") for e in emails],
+                "phones": [p.get("value", "") for p in phones],
+            })
+        print(json.dumps(contacts, indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("people", "v1")
+    results = service.people().connections().list(
+        resourceName="people/me",
+        pageSize=args.max,
+        personFields="names,emailAddresses,phoneNumbers",
+    ).execute()
+    contacts = []
+    for person in results.get("connections", []):
+        names = person.get("names", [{}])
+        emails = person.get("emailAddresses", [])
+        phones = person.get("phoneNumbers", [])
+        contacts.append({
+            "name": names[0].get("displayName", "") if names else "",
+            "emails": [e.get("value", "") for e in emails],
+            "phones": [p.get("value", "") for p in phones],
+        })
+    print(json.dumps(contacts, indent=2, ensure_ascii=False))


-# -- Sheets --
+# =========================================================================
+# Sheets
+# =========================================================================
+

 def sheets_get(args):
-    gws(
-        "sheets", "+read",
-        "--spreadsheet", args.sheet_id,
-        "--range", args.range,
-        "--format", "json",
-    )
+    if _gws_binary():
+        result = _run_gws(
+            ["sheets", "spreadsheets", "values", "get"],
+            params={"spreadsheetId": args.sheet_id, "range": args.range},
+        )
+        print(json.dumps(result.get("values", []), indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("sheets", "v4")
+    result = service.spreadsheets().values().get(
+        spreadsheetId=args.sheet_id, range=args.range,
+    ).execute()
+    print(json.dumps(result.get("values", []), indent=2, ensure_ascii=False))
+
+

 def sheets_update(args):
    values = json.loads(args.values)
-    gws(
-        "sheets", "spreadsheets", "values", "update",
-        "--params", json.dumps({
-            "spreadsheetId": args.sheet_id,
-            "range": args.range,
-            "valueInputOption": "USER_ENTERED",
-        }),
-        "--json", json.dumps({"values": values}),
-        "--format", "json",
-    )
+    body = {"values": values}
+
+    if _gws_binary():
+        result = _run_gws(
+            ["sheets", "spreadsheets", "values", "update"],
+            params={
+                "spreadsheetId": args.sheet_id,
+                "range": args.range,
+                "valueInputOption": "USER_ENTERED",
+            },
+            body=body,
+        )
+        print(json.dumps({"updatedCells": result.get("updatedCells", 0), "updatedRange": result.get("updatedRange", "")}, indent=2))
+        return
+
+    service = build_service("sheets", "v4")
+    result = service.spreadsheets().values().update(
+        spreadsheetId=args.sheet_id, range=args.range,
+        valueInputOption="USER_ENTERED", body=body,
+    ).execute()
+    print(json.dumps({"updatedCells": result.get("updatedCells", 0), "updatedRange": result.get("updatedRange", "")}, indent=2))
+
+

 def sheets_append(args):
    values = json.loads(args.values)
-    gws(
-        "sheets", "+append",
-        "--spreadsheet", args.sheet_id,
-        "--json-values", json.dumps(values),
-        "--format", "json",
-    )
+    body = {"values": values}
+
+    if _gws_binary():
+        result = _run_gws(
+            ["sheets", "spreadsheets", "values", "append"],
+            params={
+                "spreadsheetId": args.sheet_id,
+                "range": args.range,
+                "valueInputOption": "USER_ENTERED",
+                "insertDataOption": "INSERT_ROWS",
+            },
+            body=body,
+        )
+        print(json.dumps({"updatedCells": result.get("updates", {}).get("updatedCells", 0)}, indent=2))
+        return
+
+    service = build_service("sheets", "v4")
+    result = service.spreadsheets().values().append(
+        spreadsheetId=args.sheet_id, range=args.range,
+        valueInputOption="USER_ENTERED", insertDataOption="INSERT_ROWS", body=body,
+    ).execute()
+    print(json.dumps({"updatedCells": result.get("updates", {}).get("updatedCells", 0)}, indent=2))


-# -- Docs --
+# =========================================================================
+# Docs
+# =========================================================================
+

 def docs_get(args):
-    gws(
-        "docs", "documents", "get",
-        "--params", json.dumps({"documentId": args.doc_id}),
-        "--format", "json",
-    )
+    if _gws_binary():
+        doc = _run_gws(["docs", "documents", "get"], params={"documentId": args.doc_id})
+        result = {
+            "title": doc.get("title", ""),
+            "documentId": doc.get("documentId", ""),
+            "body": _extract_doc_text(doc),
+        }
+        print(json.dumps(result, indent=2, ensure_ascii=False))
+        return
+
+    service = build_service("docs", "v1")
+    doc = service.documents().get(documentId=args.doc_id).execute()
+    result = {
+        "title": doc.get("title", ""),
+        "documentId": doc.get("documentId", ""),
+        "body": _extract_doc_text(doc),
+    }
+    print(json.dumps(result, indent=2, ensure_ascii=False))


-# -- CLI parser (backward-compatible interface) --
+# =========================================================================
+# CLI parser
+# =========================================================================
+

 def main():
-    parser = argparse.ArgumentParser(description="Google Workspace API for Hermes Agent (gws backend)")
+    parser = argparse.ArgumentParser(description="Google Workspace API for Hermes Agent")
    sub = parser.add_subparsers(dest="service", required=True)

    # --- Gmail ---
@@ -228,13 +742,15 @@ def main():
    p.add_argument("--subject", required=True)
    p.add_argument("--body", required=True)
    p.add_argument("--cc", default="")
+    p.add_argument("--from", dest="from_header", default="", help="Custom From header (e.g. '\"Agent Name\" <user@example.com>')")
    p.add_argument("--html", action="store_true", help="Send body as HTML")
-    p.add_argument("--thread-id", default="", help="Thread ID (unused with gws, kept for compat)")
+    p.add_argument("--thread-id", default="", help="Thread ID for threading")
    p.set_defaults(func=gmail_send)

    p = gmail_sub.add_parser("reply")
    p.add_argument("message_id", help="Message ID to reply to")
    p.add_argument("--body", required=True)
+    p.add_argument("--from", dest="from_header", default="", help="Custom From header (e.g. '\"Agent Name\" <user@example.com>')")
    p.set_defaults(func=gmail_reply)

    p = gmail_sub.add_parser("labels")
@@ -93,6 +93,12 @@ def make_restart_runner(
    runner._running_agent_count = GatewayRunner._running_agent_count.__get__(
        runner, GatewayRunner
    )
+    runner._snapshot_running_agents = GatewayRunner._snapshot_running_agents.__get__(
+        runner, GatewayRunner
+    )
+    runner._notify_active_sessions_of_shutdown = (
+        GatewayRunner._notify_active_sessions_of_shutdown.__get__(runner, GatewayRunner)
+    )
    runner._launch_detached_restart_command = GatewayRunner._launch_detached_restart_command.__get__(
        runner, GatewayRunner
    )
@@ -220,6 +220,7 @@ def _create_app(adapter: APIServerAdapter) -> web.Application:
    app = web.Application(middlewares=mws)
    app["api_server_adapter"] = adapter
    app.router.add_get("/health", adapter._handle_health)
+    app.router.add_get("/health/detailed", adapter._handle_health_detailed)
    app.router.add_get("/v1/health", adapter._handle_health)
    app.router.add_get("/v1/models", adapter._handle_models)
    app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
@@ -277,6 +278,58 @@ class TestHealthEndpoint:
            assert data["platform"] == "hermes-agent"


+# ---------------------------------------------------------------------------
+# /health/detailed endpoint
+# ---------------------------------------------------------------------------
+
+
+class TestHealthDetailedEndpoint:
+    @pytest.mark.asyncio
+    async def test_health_detailed_returns_ok(self, adapter):
+        """GET /health/detailed returns status, platform, and runtime fields."""
+        app = _create_app(adapter)
+        with patch("gateway.status.read_runtime_status", return_value={
+            "gateway_state": "running",
+            "platforms": {"telegram": {"state": "connected"}},
+            "active_agents": 2,
+            "exit_reason": None,
+            "updated_at": "2026-04-14T00:00:00Z",
+        }):
+            async with TestClient(TestServer(app)) as cli:
+                resp = await cli.get("/health/detailed")
+                assert resp.status == 200
+                data = await resp.json()
+                assert data["status"] == "ok"
+                assert data["platform"] == "hermes-agent"
+                assert data["gateway_state"] == "running"
+                assert data["platforms"] == {"telegram": {"state": "connected"}}
+                assert data["active_agents"] == 2
+                assert isinstance(data["pid"], int)
+                assert "updated_at" in data
+
+    @pytest.mark.asyncio
+    async def test_health_detailed_no_runtime_status(self, adapter):
+        """When gateway_state.json is missing, fields are None."""
+        app = _create_app(adapter)
+        with patch("gateway.status.read_runtime_status", return_value=None):
+            async with TestClient(TestServer(app)) as cli:
+                resp = await cli.get("/health/detailed")
+                assert resp.status == 200
+                data = await resp.json()
+                assert data["status"] == "ok"
+                assert data["gateway_state"] is None
+                assert data["platforms"] == {}
+
+    @pytest.mark.asyncio
+    async def test_health_detailed_does_not_require_auth(self, auth_adapter):
+        """Health detailed endpoint should be accessible without auth, like /health."""
+        app = _create_app(auth_adapter)
+        with patch("gateway.status.read_runtime_status", return_value=None):
+            async with TestClient(TestServer(app)) as cli:
+                resp = await cli.get("/health/detailed")
+                assert resp.status == 200
+
+
 # ---------------------------------------------------------------------------
 # /v1/models endpoint
 # ---------------------------------------------------------------------------
@@ -963,6 +1016,47 @@ class TestResponsesEndpoint:
            assert len(call_kwargs["conversation_history"]) > 0
            assert call_kwargs["user_message"] == "Now add 1 more"

+    @pytest.mark.asyncio
+    async def test_previous_response_id_preserves_session(self, adapter):
+        """Chained responses via previous_response_id reuse the same session_id."""
+        mock_result = {
+            "final_response": "ok",
+            "messages": [{"role": "assistant", "content": "ok"}],
+            "api_calls": 1,
+        }
+        usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            # First request — establishes a session
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, usage)
+                resp1 = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "Hello"},
+                )
+            assert resp1.status == 200
+            first_session_id = mock_run.call_args.kwargs["session_id"]
+            data1 = await resp1.json()
+            response_id = data1["id"]
+
+            # Second request — chains from the first
+            with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (mock_result, usage)
+                resp2 = await cli.post(
+                    "/v1/responses",
+                    json={
+                        "model": "hermes-agent",
+                        "input": "Follow up",
+                        "previous_response_id": response_id,
+                    },
+                )
+            assert resp2.status == 200
+            second_session_id = mock_run.call_args.kwargs["session_id"]
+
+            # Session must be the same across the chain
+            assert first_session_id == second_session_id
+
    @pytest.mark.asyncio
    async def test_invalid_previous_response_id_returns_404(self, adapter):
        app = _create_app(adapter)
@@ -1062,6 +1156,134 @@ class TestResponsesEndpoint:
            assert resp.status == 400


+class TestResponsesStreaming:
+    @pytest.mark.asyncio
+    async def test_stream_true_returns_responses_sse(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                if cb:
+                    cb("Hello")
+                    cb(" world")
+                return (
+                    {"final_response": "Hello world", "messages": [], "api_calls": 1},
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "hi", "stream": True},
+                )
+                assert resp.status == 200
+                assert "text/event-stream" in resp.headers.get("Content-Type", "")
+                body = await resp.text()
+                assert "event: response.created" in body
+                assert "event: response.output_text.delta" in body
+                assert "event: response.output_text.done" in body
+                assert "event: response.completed" in body
+                assert '"sequence_number":' in body
+                assert '"logprobs": []' in body
+                assert "Hello" in body
+                assert " world" in body
+
+    @pytest.mark.asyncio
+    async def test_stream_emits_function_call_and_output_items(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                start_cb = kwargs.get("tool_start_callback")
+                complete_cb = kwargs.get("tool_complete_callback")
+                text_cb = kwargs.get("stream_delta_callback")
+                if start_cb:
+                    start_cb("call_123", "read_file", {"path": "/tmp/test.txt"})
+                if complete_cb:
+                    complete_cb("call_123", "read_file", {"path": "/tmp/test.txt"}, '{"content":"hello"}')
+                if text_cb:
+                    text_cb("Done.")
+                return (
+                    {
+                        "final_response": "Done.",
+                        "messages": [
+                            {
+                                "role": "assistant",
+                                "tool_calls": [
+                                    {
+                                        "id": "call_123",
+                                        "function": {
+                                            "name": "read_file",
+                                            "arguments": '{"path":"/tmp/test.txt"}',
+                                        },
+                                    }
+                                ],
+                            },
+                            {
+                                "role": "tool",
+                                "tool_call_id": "call_123",
+                                "content": '{"content":"hello"}',
+                            },
+                        ],
+                        "api_calls": 1,
+                    },
+                    {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "read the file", "stream": True},
+                )
+                assert resp.status == 200
+                body = await resp.text()
+                assert "event: response.output_item.added" in body
+                assert "event: response.output_item.done" in body
+                assert body.count("event: response.output_item.done") >= 2
+                assert '"type": "function_call"' in body
+                assert '"type": "function_call_output"' in body
+                assert '"call_id": "call_123"' in body
+                assert '"name": "read_file"' in body
+                assert '"output": [{"type": "input_text", "text": "{\\"content\\":\\"hello\\"}"}]' in body
+
+    @pytest.mark.asyncio
+    async def test_streamed_response_is_stored_for_get(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                if cb:
+                    cb("Stored response")
+                return (
+                    {"final_response": "Stored response", "messages": [], "api_calls": 1},
+                    {"input_tokens": 1, "output_tokens": 2, "total_tokens": 3},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/responses",
+                    json={"model": "hermes-agent", "input": "store this", "stream": True},
+                )
+                body = await resp.text()
+                response_id = None
+                for line in body.splitlines():
+                    if line.startswith("data: "):
+                        try:
+                            payload = json.loads(line[len("data: "):])
+                        except json.JSONDecodeError:
+                            continue
+                        if payload.get("type") == "response.completed":
+                            response_id = payload["response"]["id"]
+                            break
+                assert response_id
+
+                get_resp = await cli.get(f"/v1/responses/{response_id}")
+                assert get_resp.status == 200
+                data = await get_resp.json()
+                assert data["id"] == response_id
+                assert data["status"] == "completed"
+                assert data["output"][-1]["content"][0]["text"] == "Stored response"
+
+
 # ---------------------------------------------------------------------------
 # Auth on endpoints
 # ---------------------------------------------------------------------------
@@ -0,0 +1,95 @@
+"""Tests for the auto-continue feature (#4493).
+
+When the gateway restarts mid-agent-work, the session transcript ends on a
+tool result that the agent never processed.  The auto-continue logic detects
+this and prepends a system note to the next user message so the model
+finishes the interrupted work before addressing the new input.
+"""
+
+import pytest
+
+
+def _simulate_auto_continue(agent_history: list, user_message: str) -> str:
+    """Reproduce the auto-continue injection logic from _run_agent().
+
+    This mirrors the exact code in gateway/run.py so we can test the
+    detection and message transformation without spinning up a full
+    gateway runner.
+    """
+    message = user_message
+    if agent_history and agent_history[-1].get("role") == "tool":
+        message = (
+            "[System note: Your previous turn was interrupted before you could "
+            "process the last tool result(s). The conversation history contains "
+            "tool outputs you haven't responded to yet. Please finish processing "
+            "those results and summarize what was accomplished, then address the "
+            "user's new message below.]\n\n"
+            + message
+        )
+    return message
+
+
+class TestAutoDetection:
+    """Test that trailing tool results are correctly detected."""
+
+    def test_trailing_tool_result_triggers_note(self):
+        history = [
+            {"role": "user", "content": "deploy the app"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "call_1", "function": {"name": "terminal", "arguments": "{}"}}
+            ]},
+            {"role": "tool", "tool_call_id": "call_1", "content": "deployed successfully"},
+        ]
+        result = _simulate_auto_continue(history, "what happened?")
+        assert "[System note:" in result
+        assert "interrupted" in result
+        assert "what happened?" in result
+
+    def test_trailing_assistant_message_no_note(self):
+        history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "Hi there!"},
+        ]
+        result = _simulate_auto_continue(history, "how are you?")
+        assert "[System note:" not in result
+        assert result == "how are you?"
+
+    def test_empty_history_no_note(self):
+        result = _simulate_auto_continue([], "hello")
+        assert result == "hello"
+
+    def test_trailing_user_message_no_note(self):
+        """Shouldn't happen in practice, but ensure no false positive."""
+        history = [
+            {"role": "user", "content": "hello"},
+        ]
+        result = _simulate_auto_continue(history, "hello again")
+        assert result == "hello again"
+
+    def test_multiple_tool_results_still_triggers(self):
+        """Multiple tool calls in a row — last one is still role=tool."""
+        history = [
+            {"role": "user", "content": "search and read"},
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "call_1", "function": {"name": "search", "arguments": "{}"}},
+                {"id": "call_2", "function": {"name": "read", "arguments": "{}"}},
+            ]},
+            {"role": "tool", "tool_call_id": "call_1", "content": "found it"},
+            {"role": "tool", "tool_call_id": "call_2", "content": "file content here"},
+        ]
+        result = _simulate_auto_continue(history, "continue")
+        assert "[System note:" in result
+
+    def test_original_message_preserved_after_note(self):
+        """The user's actual message must appear after the system note."""
+        history = [
+            {"role": "assistant", "content": None, "tool_calls": [
+                {"id": "c1", "function": {"name": "t", "arguments": "{}"}}
+            ]},
+            {"role": "tool", "tool_call_id": "c1", "content": "done"},
+        ]
+        result = _simulate_auto_continue(history, "now do X")
+        # System note comes first, then user's message
+        note_end = result.index("]\n\n")
+        user_msg_start = result.index("now do X")
+        assert user_msg_start > note_end
@@ -0,0 +1,293 @@
+"""Tests for busy-session acknowledgment when user sends messages during active agent runs.
+
+Verifies that users get an immediate status response instead of total silence
+when the agent is working on a task. See PR fix for the @Lonely__MH report.
+"""
+import asyncio
+import time
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Minimal stubs so we can import gateway code without heavy deps
+# ---------------------------------------------------------------------------
+import sys, types
+
+_tg = types.ModuleType("telegram")
+_tg.constants = types.ModuleType("telegram.constants")
+_ct = MagicMock()
+_ct.SUPERGROUP = "supergroup"
+_ct.GROUP = "group"
+_ct.PRIVATE = "private"
+_tg.constants.ChatType = _ct
+sys.modules.setdefault("telegram", _tg)
+sys.modules.setdefault("telegram.constants", _tg.constants)
+sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
+
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SessionSource,
+    build_session_key,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_event(text="hello", chat_id="123", platform_val="telegram"):
+    """Build a minimal MessageEvent."""
+    source = SessionSource(
+        platform=MagicMock(value=platform_val),
+        chat_id=chat_id,
+        chat_type="private",
+        user_id="user1",
+    )
+    evt = MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="msg1",
+    )
+    return evt
+
+
+def _make_runner():
+    """Build a minimal GatewayRunner-like object for testing."""
+    from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+
+    runner = object.__new__(GatewayRunner)
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._busy_ack_ts = {}
+    runner._draining = False
+    runner.adapters = {}
+    runner.config = MagicMock()
+    runner.session_store = None
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    return runner, _AGENT_PENDING_SENTINEL
+
+
+def _make_adapter(platform_val="telegram"):
+    """Build a minimal adapter mock."""
+    adapter = MagicMock()
+    adapter._pending_messages = {}
+    adapter._send_with_retry = AsyncMock()
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    adapter.platform = MagicMock(value=platform_val)
+    return adapter
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestBusySessionAck:
+    """User sends a message while agent is running — should get acknowledgment."""
+
+    @pytest.mark.asyncio
+    async def test_sends_ack_when_agent_running(self):
+        """First message during busy session should get a status ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="Are you working?")
+        sk = build_session_key(event.source)
+
+        # Simulate running agent
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 1.0,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min ago
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+
+        assert result is True  # handled
+        # Verify ack was sent
+        adapter._send_with_retry.assert_called_once()
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
+        if not content and call_kwargs.args:
+            # positional args
+            content = str(call_kwargs)
+        assert "Interrupting" in content or "respond" in content
+        assert "/stop" not in content  # no need — we ARE interrupting
+
+        # Verify message was queued in adapter pending
+        assert sk in adapter._pending_messages
+
+        # Verify agent interrupt was called
+        agent.interrupt.assert_called_once_with("Are you working?")
+
+    @pytest.mark.asyncio
+    async def test_debounce_suppresses_rapid_acks(self):
+        """Second message within 30s should NOT send another ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event1 = _make_event(text="hello?")
+        # Reuse the same source so platform mock matches
+        event2 = MessageEvent(
+            text="still there?",
+            message_type=MessageType.TEXT,
+            source=event1.source,
+            message_id="msg2",
+        )
+        sk = build_session_key(event1.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 5,
+            "max_iterations": 60,
+            "current_tool": None,
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "api_call",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 60
+        runner.adapters[event1.source.platform] = adapter
+
+        # First message — should get ack
+        result1 = await runner._handle_active_session_busy_message(event1, sk)
+        assert result1 is True
+        assert adapter._send_with_retry.call_count == 1
+
+        # Second message within cooldown — should be queued but no ack
+        result2 = await runner._handle_active_session_busy_message(event2, sk)
+        assert result2 is True
+        assert adapter._send_with_retry.call_count == 1  # still 1, no new ack
+
+        # But interrupt should still be called for both
+        assert agent.interrupt.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_ack_after_cooldown_expires(self):
+        """After 30s cooldown, a new message should send a fresh ack."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello?")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 10,
+            "max_iterations": 60,
+            "current_tool": "web_search",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "tool",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 120
+        runner.adapters[event.source.platform] = adapter
+
+        # First ack
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 1
+
+        # Fake that cooldown expired
+        runner._busy_ack_ts[sk] = time.time() - 31
+
+        # Second ack should go through
+        await runner._handle_active_session_busy_message(event, sk)
+        assert adapter._send_with_retry.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_includes_status_detail(self):
+        """Ack message should include iteration and tool info when available."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="yo")
+        sk = build_session_key(event.source)
+
+        agent = MagicMock()
+        agent.get_activity_summary.return_value = {
+            "api_call_count": 21,
+            "max_iterations": 60,
+            "current_tool": "terminal",
+            "last_activity_ts": time.time(),
+            "last_activity_desc": "terminal",
+            "seconds_since_activity": 0.5,
+        }
+        runner._running_agents[sk] = agent
+        runner._running_agents_ts[sk] = time.time() - 600  # 10 min
+        runner.adapters[event.source.platform] = adapter
+
+        await runner._handle_active_session_busy_message(event, sk)
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "21/60" in content  # iteration
+        assert "terminal" in content  # current tool
+        assert "10 min" in content  # elapsed
+
+    @pytest.mark.asyncio
+    async def test_draining_still_works(self):
+        """Draining case should still produce the drain-specific message."""
+        runner, sentinel = _make_runner()
+        runner._draining = True
+        adapter = _make_adapter()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+        runner.adapters[event.source.platform] = adapter
+
+        # Mock the drain-specific methods
+        runner._queue_during_drain_enabled = lambda: False
+        runner._status_action_gerund = lambda: "restarting"
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content", "")
+        assert "restarting" in content
+
+    @pytest.mark.asyncio
+    async def test_pending_sentinel_no_interrupt(self):
+        """When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
+        runner, sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="hey")
+        sk = build_session_key(event.source)
+
+        runner._running_agents[sk] = sentinel
+        runner._running_agents_ts[sk] = time.time()
+        runner.adapters[event.source.platform] = adapter
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is True
+        # Should still send ack
+        adapter._send_with_retry.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_no_adapter_falls_through(self):
+        """If adapter is missing, return False so default path handles it."""
+        runner, sentinel = _make_runner()
+
+        event = _make_event(text="hello")
+        sk = build_session_key(event.source)
+
+        # No adapter registered
+        runner._running_agents[sk] = MagicMock()
+
+        result = await runner._handle_active_session_busy_message(event, sk)
+        assert result is False  # not handled, let default path try
@@ -19,10 +19,34 @@ def _ensure_discord_mock():
    discord_mod.Thread = type("Thread", (), {})
    discord_mod.ForumChannel = type("ForumChannel", (), {})
    discord_mod.Interaction = object
+
+    # Lightweight mock for app_commands.Group and Command used by
+    # _register_skill_group.
+    class _FakeGroup:
+        def __init__(self, *, name, description, parent=None):
+            self.name = name
+            self.description = description
+            self.parent = parent
+            self._children: dict[str, object] = {}
+            if parent is not None:
+                parent.add_command(self)
+
+        def add_command(self, cmd):
+            self._children[cmd.name] = cmd
+
+    class _FakeCommand:
+        def __init__(self, *, name, description, callback, parent=None):
+            self.name = name
+            self.description = description
+            self.callback = callback
+            self.parent = parent
+
    discord_mod.app_commands = SimpleNamespace(
        describe=lambda **kwargs: (lambda fn: fn),
        choices=lambda **kwargs: (lambda fn: fn),
        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+        Group=_FakeGroup,
+        Command=_FakeCommand,
    )

    ext_mod = MagicMock()
@@ -51,6 +75,12 @@ class FakeTree:

        return decorator

+    def add_command(self, cmd):
+        self.commands[cmd.name] = cmd
+
+    def get_commands(self):
+        return [SimpleNamespace(name=n) for n in self.commands]
+

@pytest.fixture
 def adapter():
@@ -87,6 +117,23 @@ async def test_registers_native_thread_slash_command(adapter):
    adapter._handle_thread_create_slash.assert_awaited_once_with(interaction, "Planning", "", 1440)


+@pytest.mark.asyncio
+async def test_registers_native_restart_slash_command(adapter):
+    adapter._run_simple_slash = AsyncMock()
+    adapter._register_slash_commands()
+
+    assert "restart" in adapter._client.tree.commands
+
+    interaction = SimpleNamespace()
+    await adapter._client.tree.commands["restart"](interaction)
+
+    adapter._run_simple_slash.assert_awaited_once_with(
+        interaction,
+        "/restart",
+        "Restart requested~",
+    )
+
+
 # ------------------------------------------------------------------
 # _handle_thread_create_slash — success, session dispatch, failure
 # ------------------------------------------------------------------
@@ -498,3 +545,79 @@ def test_discord_auto_thread_config_bridge(monkeypatch, tmp_path):

    import os
    assert os.getenv("DISCORD_AUTO_THREAD") == "true"
+
+
+# ------------------------------------------------------------------
+# /skill group registration
+# ------------------------------------------------------------------
+
+
+def test_register_skill_group_creates_group(adapter):
+    """_register_skill_group should register a '/skill' Group on the tree."""
+    mock_categories = {
+        "creative": [
+            ("ascii-art", "Generate ASCII art", "/ascii-art"),
+            ("excalidraw", "Hand-drawn diagrams", "/excalidraw"),
+        ],
+        "media": [
+            ("gif-search", "Search for GIFs", "/gif-search"),
+        ],
+    }
+    mock_uncategorized = [
+        ("dogfood", "Exploratory QA testing", "/dogfood"),
+    ]
+
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=(mock_categories, mock_uncategorized, 0),
+    ):
+        adapter._register_slash_commands()
+
+    tree = adapter._client.tree
+    assert "skill" in tree.commands, "Expected /skill group to be registered"
+    skill_group = tree.commands["skill"]
+    assert skill_group.name == "skill"
+    # Should have 2 category subgroups + 1 uncategorized subcommand
+    children = skill_group._children
+    assert "creative" in children
+    assert "media" in children
+    assert "dogfood" in children
+    # Category groups should have their skills
+    assert "ascii-art" in children["creative"]._children
+    assert "excalidraw" in children["creative"]._children
+    assert "gif-search" in children["media"]._children
+
+
+def test_register_skill_group_empty_skills_no_group(adapter):
+    """No /skill group should be added when there are zero skills."""
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=({}, [], 0),
+    ):
+        adapter._register_slash_commands()
+
+    tree = adapter._client.tree
+    assert "skill" not in tree.commands
+
+
+def test_register_skill_group_handler_dispatches_command(adapter):
+    """Skill subcommand handlers should dispatch the correct /cmd-key text."""
+    mock_categories = {
+        "media": [
+            ("gif-search", "Search for GIFs", "/gif-search"),
+        ],
+    }
+
+    with patch(
+        "hermes_cli.commands.discord_skill_commands_by_category",
+        return_value=(mock_categories, [], 0),
+    ):
+        adapter._register_slash_commands()
+
+    skill_group = adapter._client.tree.commands["skill"]
+    media_group = skill_group._children["media"]
+    gif_cmd = media_group._children["gif-search"]
+    assert gif_cmd.callback is not None
+    # The callback name should reflect the skill
+    assert "gif_search" in gif_cmd.callback.__name__
+
@@ -1,12 +1,11 @@
 """Tests for Feishu interactive card approval buttons."""

-import asyncio
+import importlib.util
 import json
-import os
 import sys
 from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock, Mock, patch
+from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

@@ -23,14 +22,14 @@ if _repo not in sys.path:
 # ---------------------------------------------------------------------------
 def _ensure_feishu_mocks():
    """Provide stubs for lark-oapi / aiohttp.web so the import succeeds."""
-    if "lark_oapi" not in sys.modules:
+    if importlib.util.find_spec("lark_oapi") is None and "lark_oapi" not in sys.modules:
        mod = MagicMock()
        for name in (
            "lark_oapi", "lark_oapi.api.im.v1",
            "lark_oapi.event", "lark_oapi.event.callback_type",
        ):
            sys.modules.setdefault(name, mod)
-    if "aiohttp" not in sys.modules:
+    if importlib.util.find_spec("aiohttp") is None and "aiohttp" not in sys.modules:
        aio = MagicMock()
        sys.modules.setdefault("aiohttp", aio)
        sys.modules.setdefault("aiohttp.web", aio.web)
@@ -39,6 +38,7 @@ def _ensure_feishu_mocks():
 _ensure_feishu_mocks()

 from gateway.config import PlatformConfig
+import gateway.platforms.feishu as feishu_module
 from gateway.platforms.feishu import FeishuAdapter


@@ -74,6 +74,12 @@ def _make_card_action_data(
    )


+def _close_submitted_coro(coro, _loop):
+    """Close scheduled coroutines in sync-handler tests to avoid unawaited warnings."""
+    coro.close()
+    return SimpleNamespace(add_done_callback=lambda *_args, **_kwargs: None)
+
+
 # ===========================================================================
 # send_exec_approval — interactive card with buttons
 # ===========================================================================
@@ -203,14 +209,14 @@ class TestFeishuExecApproval:


 # ===========================================================================
-# _handle_card_action_event — approval button clicks
+# _resolve_approval — approval state pop + gateway resolution
 # ===========================================================================

-class TestFeishuApprovalCallback:
-    """Test the approval intercept in _handle_card_action_event."""
+class TestResolveApproval:
+    """Test _resolve_approval pops state and calls resolve_gateway_approval."""

    @pytest.mark.asyncio
-    async def test_resolves_approval_on_click(self):
+    async def test_resolves_once(self):
        adapter = _make_adapter()
        adapter._approval_state[1] = {
            "session_key": "agent:main:feishu:group:oc_12345",
@@ -218,28 +224,14 @@ class TestFeishuApprovalCallback:
            "chat_id": "oc_12345",
        }

-        data = _make_card_action_data(
-            action_value={"hermes_action": "approve_once", "approval_id": 1},
-        )
-
-        with (
-            patch.object(
-                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
-                return_value={"user_id": "ou_user1", "user_name": "Norbert", "user_id_alt": None},
-            ),
-            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
-            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
-        ):
-            await adapter._handle_card_action_event(data)
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._resolve_approval(1, "once", "Norbert")

        mock_resolve.assert_called_once_with("agent:main:feishu:group:oc_12345", "once")
-        mock_update.assert_called_once_with("msg_001", "Approved once", "Norbert", "once")
-
-        # State should be cleaned up
        assert 1 not in adapter._approval_state

    @pytest.mark.asyncio
-    async def test_deny_button(self):
+    async def test_resolves_deny(self):
        adapter = _make_adapter()
        adapter._approval_state[2] = {
            "session_key": "some-session",
@@ -247,26 +239,13 @@ class TestFeishuApprovalCallback:
            "chat_id": "oc_12345",
        }

-        data = _make_card_action_data(
-            action_value={"hermes_action": "deny", "approval_id": 2},
-            token="tok_deny",
-        )
-
-        with (
-            patch.object(
-                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
-                return_value={"user_id": "ou_alice", "user_name": "Alice", "user_id_alt": None},
-            ),
-            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
-            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
-        ):
-            await adapter._handle_card_action_event(data)
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._resolve_approval(2, "deny", "Alice")

        mock_resolve.assert_called_once_with("some-session", "deny")
-        mock_update.assert_called_once_with("msg_002", "Denied", "Alice", "deny")

    @pytest.mark.asyncio
-    async def test_session_approval(self):
+    async def test_resolves_session(self):
        adapter = _make_adapter()
        adapter._approval_state[3] = {
            "session_key": "sess-3",
@@ -274,26 +253,13 @@ class TestFeishuApprovalCallback:
            "chat_id": "oc_99",
        }

-        data = _make_card_action_data(
-            action_value={"hermes_action": "approve_session", "approval_id": 3},
-            token="tok_ses",
-        )
-
-        with (
-            patch.object(
-                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
-                return_value={"user_id": "ou_u", "user_name": "Bob", "user_id_alt": None},
-            ),
-            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
-            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
-        ):
-            await adapter._handle_card_action_event(data)
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._resolve_approval(3, "session", "Bob")

        mock_resolve.assert_called_once_with("sess-3", "session")
-        mock_update.assert_called_once_with("msg_003", "Approved for session", "Bob", "session")

    @pytest.mark.asyncio
-    async def test_always_approval(self):
+    async def test_resolves_always(self):
        adapter = _make_adapter()
        adapter._approval_state[4] = {
            "session_key": "sess-4",
@@ -301,42 +267,29 @@ class TestFeishuApprovalCallback:
            "chat_id": "oc_55",
        }

-        data = _make_card_action_data(
-            action_value={"hermes_action": "approve_always", "approval_id": 4},
-            token="tok_alw",
-        )
-
-        with (
-            patch.object(
-                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
-                return_value={"user_id": "ou_u", "user_name": "Carol", "user_id_alt": None},
-            ),
-            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock),
-            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
-        ):
-            await adapter._handle_card_action_event(data)
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._resolve_approval(4, "always", "Carol")

        mock_resolve.assert_called_once_with("sess-4", "always")

    @pytest.mark.asyncio
    async def test_already_resolved_drops_silently(self):
        adapter = _make_adapter()
-        # No state for approval_id 99 — already resolved
-
-        data = _make_card_action_data(
-            action_value={"hermes_action": "approve_once", "approval_id": 99},
-            token="tok_gone",
-        )

        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
-            await adapter._handle_card_action_event(data)
+            await adapter._resolve_approval(99, "once", "Nobody")

-        # Should NOT resolve — already handled
        mock_resolve.assert_not_called()

+# ===========================================================================
+# _handle_card_action_event — non-approval card actions
+# ===========================================================================
+
+class TestNonApprovalCardAction:
+    """Non-approval card actions should still route as synthetic commands."""
+
    @pytest.mark.asyncio
-    async def test_non_approval_actions_route_normally(self):
-        """Non-approval card actions should still become synthetic commands."""
+    async def test_routes_as_synthetic_command(self):
        adapter = _make_adapter()

        data = _make_card_action_data(
@@ -351,82 +304,141 @@ class TestFeishuApprovalCallback:
            ),
            patch.object(adapter, "get_chat_info", new_callable=AsyncMock, return_value={"name": "Test Chat"}),
            patch.object(adapter, "_handle_message_with_guards", new_callable=AsyncMock) as mock_handle,
-            patch("tools.approval.resolve_gateway_approval") as mock_resolve,
        ):
            await adapter._handle_card_action_event(data)

-        # Should NOT resolve any approval
-        mock_resolve.assert_not_called()
-        # Should have routed as synthetic command
        mock_handle.assert_called_once()
        event = mock_handle.call_args[0][0]
        assert "/card button" in event.text


 # ===========================================================================
-# _update_approval_card — card replacement after resolution
+# _on_card_action_trigger — inline card response for approval actions
 # ===========================================================================

-class TestFeishuUpdateApprovalCard:
-    """Test the card update after approval resolution."""
+class _FakeCallBackCard:
+    def __init__(self):
+        self.type = None
+        self.data = None

-    @pytest.mark.asyncio
-    async def test_updates_card_on_approve(self):
+
+class _FakeP2Response:
+    def __init__(self):
+        self.card = None
+
+
+@pytest.fixture(autouse=False)
+def _patch_callback_card_types(monkeypatch):
+    """Provide real-ish P2CardActionTriggerResponse / CallBackCard for tests."""
+    monkeypatch.setattr(feishu_module, "P2CardActionTriggerResponse", _FakeP2Response)
+    monkeypatch.setattr(feishu_module, "CallBackCard", _FakeCallBackCard)
+
+
+class TestCardActionCallbackResponse:
+    """Test that _on_card_action_trigger returns updated card inline."""
+
+    def test_drops_action_when_loop_not_ready(self, _patch_callback_card_types):
        adapter = _make_adapter()
+        adapter._loop = None
+        data = _make_card_action_data({"hermes_action": "approve_once", "approval_id": 1})

-        mock_update = AsyncMock()
-        adapter._client.im.v1.message.update = MagicMock()
+        with patch("asyncio.run_coroutine_threadsafe") as mock_submit:
+            response = adapter._on_card_action_trigger(data)

-        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
-            await adapter._update_approval_card(
-                "msg_001", "Approved once", "Norbert", "once"
-            )
+        assert response is not None
+        assert response.card is None
+        mock_submit.assert_not_called()

-        mock_thread.assert_called_once()
-        # Verify the update request was built
-        call_args = mock_thread.call_args
-        assert call_args[0][0] == adapter._client.im.v1.message.update
-
-    @pytest.mark.asyncio
-    async def test_updates_card_on_deny(self):
+    def test_returns_card_for_approve_action(self, _patch_callback_card_types):
        adapter = _make_adapter()
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data(
+            {"hermes_action": "approve_once", "approval_id": 1},
+            open_id="ou_bob",
+        )
+        adapter._sender_name_cache["ou_bob"] = ("Bob", 9999999999)

-        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
-            await adapter._update_approval_card(
-                "msg_002", "Denied", "Alice", "deny"
-            )
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=_close_submitted_coro):
+            response = adapter._on_card_action_trigger(data)

-        mock_thread.assert_called_once()
+        assert response is not None
+        assert response.card is not None
+        assert response.card.type == "raw"
+        card = response.card.data
+        assert card["header"]["template"] == "green"
+        assert "Approved once" in card["header"]["title"]["content"]
+        assert "Bob" in card["elements"][0]["content"]

-    @pytest.mark.asyncio
-    async def test_skips_update_when_not_connected(self):
+    def test_returns_card_for_deny_action(self, _patch_callback_card_types):
        adapter = _make_adapter()
-        adapter._client = None
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data(
+            {"hermes_action": "deny", "approval_id": 2},
+        )

-        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
-            await adapter._update_approval_card(
-                "msg_001", "Approved", "Bob", "once"
-            )
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=_close_submitted_coro):
+            response = adapter._on_card_action_trigger(data)

-        mock_thread.assert_not_called()
+        assert response.card is not None
+        card = response.card.data
+        assert card["header"]["template"] == "red"
+        assert "Denied" in card["header"]["title"]["content"]

-    @pytest.mark.asyncio
-    async def test_skips_update_when_no_message_id(self):
+    def test_ignores_missing_approval_id(self, _patch_callback_card_types):
        adapter = _make_adapter()
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data({"hermes_action": "approve_once"})

-        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
-            await adapter._update_approval_card(
-                "", "Approved", "Bob", "once"
-            )
+        with patch("asyncio.run_coroutine_threadsafe") as mock_submit:
+            response = adapter._on_card_action_trigger(data)

-        mock_thread.assert_not_called()
+        assert response is not None
+        assert response.card is None
+        mock_submit.assert_not_called()

-    @pytest.mark.asyncio
-    async def test_swallows_update_errors(self):
+    def test_no_card_for_non_approval_action(self, _patch_callback_card_types):
        adapter = _make_adapter()
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data({"some_other": "value"})

-        with patch("asyncio.to_thread", new_callable=AsyncMock, side_effect=Exception("API error")):
-            # Should not raise
-            await adapter._update_approval_card(
-                "msg_001", "Approved", "Bob", "once"
-            )
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=_close_submitted_coro):
+            response = adapter._on_card_action_trigger(data)
+
+        assert response is not None
+        assert response.card is None
+
+    def test_falls_back_to_open_id_when_name_not_cached(self, _patch_callback_card_types):
+        adapter = _make_adapter()
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data(
+            {"hermes_action": "approve_session", "approval_id": 3},
+            open_id="ou_unknown",
+        )
+
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=_close_submitted_coro):
+            response = adapter._on_card_action_trigger(data)
+
+        card = response.card.data
+        assert "ou_unknown" in card["elements"][0]["content"]
+
+    def test_ignores_expired_cached_name(self, _patch_callback_card_types):
+        adapter = _make_adapter()
+        adapter._loop = MagicMock()
+        adapter._loop.is_closed = MagicMock(return_value=False)
+        data = _make_card_action_data(
+            {"hermes_action": "approve_once", "approval_id": 4},
+            open_id="ou_expired",
+        )
+        adapter._sender_name_cache["ou_expired"] = ("Old Name", 1)
+
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=_close_submitted_coro):
+            response = adapter._on_card_action_trigger(data)
+
+        card = response.card.data
+        assert "Old Name" not in card["elements"][0]["content"]
+        assert "ou_expired" in card["elements"][0]["content"]
@@ -161,3 +161,84 @@ async def test_launch_detached_restart_command_uses_setsid(monkeypatch):
    assert kwargs["start_new_session"] is True
    assert kwargs["stdout"] is subprocess.DEVNULL
    assert kwargs["stderr"] is subprocess.DEVNULL
+
+
+# ── Shutdown notification tests ──────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_sent_to_active_sessions():
+    """Active sessions receive a notification when the gateway starts shutting down."""
+    runner, adapter = make_restart_runner()
+    source = make_restart_source(chat_id="999", chat_type="dm")
+    session_key = f"agent:main:telegram:dm:999"
+    runner._running_agents[session_key] = MagicMock()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 1
+    assert "shutting down" in adapter.sent[0]
+    assert "interrupted" in adapter.sent[0]
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_says_restarting_when_restart_requested():
+    """When _restart_requested is True, the message says 'restarting' and mentions /retry."""
+    runner, adapter = make_restart_runner()
+    runner._restart_requested = True
+    session_key = "agent:main:telegram:dm:999"
+    runner._running_agents[session_key] = MagicMock()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 1
+    assert "restarting" in adapter.sent[0]
+    assert "resume" in adapter.sent[0]
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_deduplicates_per_chat():
+    """Multiple sessions in the same chat only get one notification."""
+    runner, adapter = make_restart_runner()
+    # Two sessions (different users) in the same chat
+    runner._running_agents["agent:main:telegram:group:chat1:u1"] = MagicMock()
+    runner._running_agents["agent:main:telegram:group:chat1:u2"] = MagicMock()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 1
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_skipped_when_no_active_agents():
+    """No notification is sent when there are no active agents."""
+    runner, adapter = make_restart_runner()
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 0
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_ignores_pending_sentinels():
+    """Pending sentinels (not-yet-started agents) don't trigger notifications."""
+    from gateway.run import _AGENT_PENDING_SENTINEL
+
+    runner, adapter = make_restart_runner()
+    runner._running_agents["agent:main:telegram:dm:999"] = _AGENT_PENDING_SENTINEL
+
+    await runner._notify_active_sessions_of_shutdown()
+
+    assert len(adapter.sent) == 0
+
+
+@pytest.mark.asyncio
+async def test_shutdown_notification_send_failure_does_not_block():
+    """If sending a notification fails, the method still completes."""
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock(side_effect=Exception("network error"))
+    session_key = "agent:main:telegram:dm:999"
+    runner._running_agents[session_key] = MagicMock()
+
+    # Should not raise
+    await runner._notify_active_sessions_of_shutdown()
@@ -0,0 +1,116 @@
+"""Tests for stuck-session loop detection (#7536).
+
+When a session is active across 3+ consecutive gateway restarts (the agent
+gets stuck, gateway restarts, same session gets stuck again), the session
+is auto-suspended on startup so the user gets a clean slate.
+"""
+
+import json
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+
+from tests.gateway.restart_test_helpers import make_restart_runner
+
+
+@pytest.fixture
+def runner_with_home(tmp_path, monkeypatch):
+    """Create a runner with a writable HERMES_HOME."""
+    monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+    runner, adapter = make_restart_runner()
+    return runner, tmp_path
+
+
+class TestStuckLoopDetection:
+
+    def test_increment_creates_file(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a", "session:b"})
+        path = home / runner._STUCK_LOOP_FILE
+        assert path.exists()
+        counts = json.loads(path.read_text())
+        assert counts["session:a"] == 1
+        assert counts["session:b"] == 1
+
+    def test_increment_accumulates(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a"})
+        runner._increment_restart_failure_counts({"session:a"})
+        runner._increment_restart_failure_counts({"session:a"})
+        counts = json.loads((home / runner._STUCK_LOOP_FILE).read_text())
+        assert counts["session:a"] == 3
+
+    def test_increment_drops_inactive_sessions(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a", "session:b"})
+        runner._increment_restart_failure_counts({"session:a"})  # b not active
+        counts = json.loads((home / runner._STUCK_LOOP_FILE).read_text())
+        assert "session:a" in counts
+        assert "session:b" not in counts
+
+    def test_suspend_at_threshold(self, runner_with_home):
+        runner, home = runner_with_home
+        # Simulate 3 restarts with session:a active each time
+        for _ in range(3):
+            runner._increment_restart_failure_counts({"session:a"})
+
+        # Create a mock session entry
+        mock_entry = MagicMock()
+        mock_entry.suspended = False
+        runner.session_store._entries = {"session:a": mock_entry}
+        runner.session_store._save = MagicMock()
+
+        suspended = runner._suspend_stuck_loop_sessions()
+        assert suspended == 1
+        assert mock_entry.suspended is True
+
+    def test_no_suspend_below_threshold(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a"})
+        runner._increment_restart_failure_counts({"session:a"})
+        # Only 2 restarts — below threshold of 3
+
+        mock_entry = MagicMock()
+        mock_entry.suspended = False
+        runner.session_store._entries = {"session:a": mock_entry}
+
+        suspended = runner._suspend_stuck_loop_sessions()
+        assert suspended == 0
+        assert mock_entry.suspended is False
+
+    def test_clear_on_success(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a", "session:b"})
+        runner._clear_restart_failure_count("session:a")
+
+        path = home / runner._STUCK_LOOP_FILE
+        counts = json.loads(path.read_text())
+        assert "session:a" not in counts
+        assert "session:b" in counts
+
+    def test_clear_removes_file_when_empty(self, runner_with_home):
+        runner, home = runner_with_home
+        runner._increment_restart_failure_counts({"session:a"})
+        runner._clear_restart_failure_count("session:a")
+        assert not (home / runner._STUCK_LOOP_FILE).exists()
+
+    def test_suspend_clears_file(self, runner_with_home):
+        runner, home = runner_with_home
+        for _ in range(3):
+            runner._increment_restart_failure_counts({"session:a"})
+
+        mock_entry = MagicMock()
+        mock_entry.suspended = False
+        runner.session_store._entries = {"session:a": mock_entry}
+        runner.session_store._save = MagicMock()
+
+        runner._suspend_stuck_loop_sessions()
+        assert not (home / runner._STUCK_LOOP_FILE).exists()
+
+    def test_no_file_no_crash(self, runner_with_home):
+        runner, home = runner_with_home
+        # No file exists — should return 0 and not crash
+        assert runner._suspend_stuck_loop_sessions() == 0
+        # Clear on nonexistent file — should not crash
+        runner._clear_restart_failure_count("nonexistent")
@@ -1028,3 +1028,154 @@ class TestDiscordSkillCommands:
            assert len(name) <= _CMD_NAME_LIMIT, (
                f"Name '{name}' is {len(name)} chars (limit {_CMD_NAME_LIMIT})"
            )
+
+
+# ---------------------------------------------------------------------------
+# Discord skill commands grouped by category
+# ---------------------------------------------------------------------------
+
+from hermes_cli.commands import discord_skill_commands_by_category  # noqa: E402
+
+
+class TestDiscordSkillCommandsByCategory:
+    """Tests for discord_skill_commands_by_category() — /skill group registration."""
+
+    def test_groups_skills_by_category(self, tmp_path, monkeypatch):
+        """Skills nested 2+ levels deep should be grouped by top-level category."""
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+        # Create the directory structure so resolve() works
+        for p in [
+            "skills/creative/ascii-art",
+            "skills/creative/excalidraw",
+            "skills/media/gif-search",
+        ]:
+            (tmp_path / p).mkdir(parents=True, exist_ok=True)
+            (tmp_path / p / "SKILL.md").write_text("---\nname: test\n---\n")
+
+        fake_cmds = {
+            "/ascii-art": {
+                "name": "ascii-art",
+                "description": "Generate ASCII art",
+                "skill_md_path": f"{fake_skills_dir}/creative/ascii-art/SKILL.md",
+            },
+            "/excalidraw": {
+                "name": "excalidraw",
+                "description": "Hand-drawn diagrams",
+                "skill_md_path": f"{fake_skills_dir}/creative/excalidraw/SKILL.md",
+            },
+            "/gif-search": {
+                "name": "gif-search",
+                "description": "Search for GIFs",
+                "skill_md_path": f"{fake_skills_dir}/media/gif-search/SKILL.md",
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        assert "creative" in categories
+        assert "media" in categories
+        assert len(categories["creative"]) == 2
+        assert len(categories["media"]) == 1
+        assert uncategorized == []
+        assert hidden == 0
+
+    def test_root_level_skills_are_uncategorized(self, tmp_path, monkeypatch):
+        """Skills directly under SKILLS_DIR (only 1 path component) → uncategorized."""
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+        (tmp_path / "skills" / "dogfood").mkdir(parents=True, exist_ok=True)
+        (tmp_path / "skills" / "dogfood" / "SKILL.md").write_text("")
+
+        fake_cmds = {
+            "/dogfood": {
+                "name": "dogfood",
+                "description": "QA testing",
+                "skill_md_path": f"{fake_skills_dir}/dogfood/SKILL.md",
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        assert categories == {}
+        assert len(uncategorized) == 1
+        assert uncategorized[0][0] == "dogfood"
+
+    def test_hub_skills_excluded(self, tmp_path, monkeypatch):
+        """Skills under .hub should be excluded."""
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+        (tmp_path / "skills" / ".hub" / "some-skill").mkdir(parents=True, exist_ok=True)
+        (tmp_path / "skills" / ".hub" / "some-skill" / "SKILL.md").write_text("")
+
+        fake_cmds = {
+            "/some-skill": {
+                "name": "some-skill",
+                "description": "Hub skill",
+                "skill_md_path": f"{fake_skills_dir}/.hub/some-skill/SKILL.md",
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        assert categories == {}
+        assert uncategorized == []
+
+    def test_deep_nested_skills_use_top_category(self, tmp_path, monkeypatch):
+        """Skills like mlops/training/axolotl should group under 'mlops'."""
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+        (tmp_path / "skills" / "mlops" / "training" / "axolotl").mkdir(parents=True, exist_ok=True)
+        (tmp_path / "skills" / "mlops" / "training" / "axolotl" / "SKILL.md").write_text("")
+        (tmp_path / "skills" / "mlops" / "inference" / "vllm").mkdir(parents=True, exist_ok=True)
+        (tmp_path / "skills" / "mlops" / "inference" / "vllm" / "SKILL.md").write_text("")
+
+        fake_cmds = {
+            "/axolotl": {
+                "name": "axolotl",
+                "description": "Fine-tuning with Axolotl",
+                "skill_md_path": f"{fake_skills_dir}/mlops/training/axolotl/SKILL.md",
+            },
+            "/vllm": {
+                "name": "vllm",
+                "description": "vLLM inference",
+                "skill_md_path": f"{fake_skills_dir}/mlops/inference/vllm/SKILL.md",
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        # Both should be under 'mlops' regardless of sub-category
+        assert "mlops" in categories
+        names = {n for n, _d, _k in categories["mlops"]}
+        assert "axolotl" in names
+        assert "vllm" in names
+        assert len(uncategorized) == 0
@@ -0,0 +1,275 @@
+"""Tests for the Command Installation check in hermes doctor."""
+
+import os
+import sys
+import types
+from argparse import Namespace
+from pathlib import Path
+
+import pytest
+
+import hermes_cli.doctor as doctor_mod
+
+
+def _setup_doctor_env(monkeypatch, tmp_path, venv_name="venv"):
+    """Create a minimal HERMES_HOME + PROJECT_ROOT for doctor tests."""
+    home = tmp_path / ".hermes"
+    home.mkdir(parents=True, exist_ok=True)
+    (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+    project = tmp_path / "project"
+    project.mkdir(exist_ok=True)
+
+    # Create a fake venv entry point
+    venv_bin_dir = project / venv_name / "bin"
+    venv_bin_dir.mkdir(parents=True, exist_ok=True)
+    hermes_bin = venv_bin_dir / "hermes"
+    hermes_bin.write_text("#!/usr/bin/env python\n# entry point\n")
+    hermes_bin.chmod(0o755)
+
+    monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+    monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+    monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+
+    # Stub model_tools so doctor doesn't fail on import
+    fake_model_tools = types.SimpleNamespace(
+        check_tool_availability=lambda *a, **kw: ([], []),
+        TOOLSET_REQUIREMENTS={},
+    )
+    monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+    # Stub auth checks
+    try:
+        from hermes_cli import auth as _auth_mod
+        monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+        monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+    except Exception:
+        pass
+
+    # Stub httpx.get to avoid network calls
+    try:
+        import httpx
+        monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+    except Exception:
+        pass
+
+    return home, project, hermes_bin
+
+
+def _run_doctor(fix=False):
+    """Run doctor and capture stdout."""
+    import io
+    import contextlib
+
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        doctor_mod.run_doctor(Namespace(fix=fix))
+    return buf.getvalue()
+
+
+class TestDoctorCommandInstallation:
+    """Tests for the ◆ Command Installation section."""
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_correct_symlink_shows_ok(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create the command link dir with correct symlink
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "correct target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_symlink_shows_fail(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        # Don't create the symlink — it should be missing
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point exists" in out
+        assert "not found" in out
+        assert "hermes doctor --fix" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_creates_missing_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Command Installation" in out
+        assert "Created symlink" in out
+
+        # Verify the symlink was actually created
+        cmd_link = tmp_path / ".local" / "bin" / "hermes"
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_wrong_target_symlink_shows_warn(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to the wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "wrong target" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_fix_repairs_wrong_symlink(self, monkeypatch, tmp_path):
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        # Create a symlink pointing to wrong target
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        wrong_target = tmp_path / "wrong_hermes"
+        wrong_target.write_text("#!/usr/bin/env python\n")
+        cmd_link.symlink_to(wrong_target)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=True)
+        assert "Fixed symlink" in out
+
+        # Verify the symlink now points to the correct target
+        assert cmd_link.is_symlink()
+        assert cmd_link.resolve() == hermes_bin.resolve()
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_missing_venv_entry_point_shows_warn(self, monkeypatch, tmp_path):
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+        # Do NOT create any venv entry point
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "Venv entry point not found" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_dot_venv_dir_is_found(self, monkeypatch, tmp_path):
+        """The check finds entry points in .venv/ as well as venv/."""
+        home, project, _ = _setup_doctor_env(monkeypatch, tmp_path, venv_name=".venv")
+
+        # Create the command link with correct symlink
+        hermes_bin = project / ".venv" / "bin" / "hermes"
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.symlink_to(hermes_bin)
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Venv entry point exists" in out
+        assert ".venv/bin/hermes" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_non_symlink_regular_file_shows_ok(self, monkeypatch, tmp_path):
+        """If ~/.local/bin/hermes is a regular file (not symlink), accept it."""
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        cmd_link_dir = tmp_path / ".local" / "bin"
+        cmd_link_dir.mkdir(parents=True)
+        cmd_link = cmd_link_dir / "hermes"
+        cmd_link.write_text("#!/bin/sh\nexec python -m hermes_cli.main \"$@\"\n")
+
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "non-symlink" in out
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="Symlink check is Unix-only")
+    def test_termux_uses_prefix_bin(self, monkeypatch, tmp_path):
+        """On Termux, the command link dir is $PREFIX/bin."""
+        prefix_dir = tmp_path / "termux_prefix"
+        prefix_bin = prefix_dir / "bin"
+        prefix_bin.mkdir(parents=True)
+
+        home, project, hermes_bin = _setup_doctor_env(monkeypatch, tmp_path)
+
+        monkeypatch.setenv("TERMUX_VERSION", "0.118.3")
+        monkeypatch.setenv("PREFIX", str(prefix_dir))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" in out
+        assert "$PREFIX/bin" in out
+
+    def test_windows_skips_check(self, monkeypatch, tmp_path):
+        """On Windows, the Command Installation section is skipped."""
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        (home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
+
+        project = tmp_path / "project"
+        project.mkdir(exist_ok=True)
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        monkeypatch.setattr(sys, "platform", "win32")
+
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+        try:
+            import httpx
+            monkeypatch.setattr(httpx, "get", lambda *a, **kw: types.SimpleNamespace(status_code=200))
+        except Exception:
+            pass
+
+        out = _run_doctor(fix=False)
+        assert "Command Installation" not in out
@@ -452,7 +452,7 @@ class TestGatewayServiceDetection:


 class TestGatewaySystemServiceRouting:
-    def test_systemd_restart_self_requests_graceful_restart_without_reload_or_restart(self, monkeypatch, capsys):
+    def test_systemd_restart_self_requests_graceful_restart_and_waits(self, monkeypatch, capsys):
        calls = []

        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
@@ -466,16 +466,37 @@ class TestGatewaySystemServiceRouting:
            "_request_gateway_self_restart",
            lambda pid: calls.append(("self", pid)) or True,
        )
-        monkeypatch.setattr(
-            gateway_cli.subprocess,
-            "run",
-            lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("systemctl should not run")),
-        )
+
+        # Simulate: old process dies immediately, new process becomes active
+        kill_call_count = [0]
+        def fake_kill(pid, sig):
+            kill_call_count[0] += 1
+            if kill_call_count[0] >= 2:  # first call checks, second = dead
+                raise ProcessLookupError()
+        monkeypatch.setattr(os, "kill", fake_kill)
+
+        # Simulate systemctl is-active returning "active" with a new PID
+        new_pid = [None]
+        def fake_subprocess_run(cmd, **kwargs):
+            if "is-active" in cmd:
+                result = SimpleNamespace(stdout="active\n", returncode=0)
+                new_pid[0] = 999  # new PID
+                return result
+            raise AssertionError(f"Unexpected systemctl call: {cmd}")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
+        # get_running_pid returns new PID after restart
+        pid_calls = [0]
+        def fake_get_pid():
+            pid_calls[0] += 1
+            return 999 if pid_calls[0] > 1 else 654
+        monkeypatch.setattr("gateway.status.get_running_pid", fake_get_pid)

        gateway_cli.systemd_restart()

-        assert calls == [("refresh", False), ("self", 654)]
-        assert "restart requested" in capsys.readouterr().out.lower()
+        assert ("self", 654) in calls
+        out = capsys.readouterr().out.lower()
+        assert "restarted" in out

    def test_gateway_install_passes_system_flags(self, monkeypatch):
        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
@@ -0,0 +1,83 @@
+"""Tests for non-ASCII credential detection and sanitization.
+
+Covers the fix for issue #6843 — API keys containing Unicode lookalike
+characters (e.g. ʋ U+028B instead of v) cause UnicodeEncodeError when
+httpx tries to encode the Authorization header as ASCII.
+"""
+
+import os
+import sys
+import tempfile
+
+import pytest
+
+from hermes_cli.config import _check_non_ascii_credential
+
+
+class TestCheckNonAsciiCredential:
+    """Tests for _check_non_ascii_credential()."""
+
+    def test_ascii_key_unchanged(self):
+        key = "sk-proj-" + "a" * 100
+        result = _check_non_ascii_credential("TEST_API_KEY", key)
+        assert result == key
+
+    def test_strips_unicode_v_lookalike(self, capsys):
+        """The exact scenario from issue #6843: ʋ instead of v."""
+        key = "sk-proj-abc" + "ʋ" + "def"  # \u028b
+        result = _check_non_ascii_credential("OPENROUTER_API_KEY", key)
+        assert result == "sk-proj-abcdef"
+        assert "ʋ" not in result
+        # Should print a warning
+        captured = capsys.readouterr()
+        assert "non-ASCII" in captured.err
+
+    def test_strips_multiple_non_ascii(self, capsys):
+        key = "sk-proj-aʋbécd"
+        result = _check_non_ascii_credential("OPENAI_API_KEY", key)
+        assert result == "sk-proj-abcd"
+        captured = capsys.readouterr()
+        assert "U+028B" in captured.err  # reports the char
+
+    def test_empty_key(self):
+        result = _check_non_ascii_credential("TEST_KEY", "")
+        assert result == ""
+
+    def test_all_ascii_no_warning(self, capsys):
+        result = _check_non_ascii_credential("KEY", "all-ascii-value-123")
+        assert result == "all-ascii-value-123"
+        captured = capsys.readouterr()
+        assert captured.err == ""
+
+
+class TestEnvLoaderSanitization:
+    """Tests for _sanitize_loaded_credentials in env_loader."""
+
+    def test_strips_non_ascii_from_api_key(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-proj-abcʋdef")
+        _sanitize_loaded_credentials()
+        assert os.environ["OPENROUTER_API_KEY"] == "sk-proj-abcdef"
+
+    def test_strips_non_ascii_from_token(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tokénvalue")
+        _sanitize_loaded_credentials()
+        assert os.environ["DISCORD_BOT_TOKEN"] == "toknvalue"
+
+    def test_ignores_non_credential_vars(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("MY_UNICODE_VAR", "héllo wörld")
+        _sanitize_loaded_credentials()
+        # Not a credential suffix — should be left alone
+        assert os.environ["MY_UNICODE_VAR"] == "héllo wörld"
+
+    def test_ascii_credentials_untouched(self, monkeypatch):
+        from hermes_cli.env_loader import _sanitize_loaded_credentials
+
+        monkeypatch.setenv("OPENAI_API_KEY", "sk-proj-allascii123")
+        _sanitize_loaded_credentials()
+        assert os.environ["OPENAI_API_KEY"] == "sk-proj-allascii123"
@@ -0,0 +1,148 @@
+"""Tests for the defensive subparser routing workaround (bpo-9338).
+
+The main() function in hermes_cli/main.py sets subparsers.required=True
+when argv contains a known subcommand name.  This forces deterministic
+routing on Python versions where argparse fails to match subcommand tokens
+when the parent parser has nargs='?' optional arguments (--continue).
+
+If the subcommand token is consumed as a flag value (e.g. `hermes -c model`
+to resume a session named 'model'), the required=True parse raises
+SystemExit and the code falls back to the default required=False behaviour.
+"""
+import argparse
+import io
+import sys
+
+import pytest
+
+
+def _build_parser():
+    """Build a minimal replica of the hermes top-level parser."""
+    parser = argparse.ArgumentParser(prog="hermes")
+    parser.add_argument("--version", "-V", action="store_true")
+    parser.add_argument("--resume", "-r", metavar="SESSION", default=None)
+    parser.add_argument(
+        "--continue", "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+    )
+    parser.add_argument("--worktree", "-w", action="store_true", default=False)
+    parser.add_argument("--skills", "-s", action="append", default=None)
+    parser.add_argument("--yolo", action="store_true", default=False)
+    parser.add_argument("--pass-session-id", action="store_true", default=False)
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    chat_p = subparsers.add_parser("chat")
+    chat_p.add_argument("-q", "--query", default=None)
+    subparsers.add_parser("model")
+    subparsers.add_parser("gateway")
+    subparsers.add_parser("setup")
+    return parser, subparsers
+
+
+def _safe_parse(parser, subparsers, argv):
+    """Replica of the defensive parsing logic from main()."""
+    known_cmds = set(subparsers.choices.keys()) if hasattr(subparsers, "choices") else set()
+    has_cmd_token = any(t in known_cmds for t in argv if not t.startswith("-"))
+
+    if has_cmd_token:
+        subparsers.required = True
+        saved_stderr = sys.stderr
+        try:
+            sys.stderr = io.StringIO()
+            args = parser.parse_args(argv)
+            sys.stderr = saved_stderr
+            return args
+        except SystemExit:
+            sys.stderr = saved_stderr
+            subparsers.required = False
+            return parser.parse_args(argv)
+    else:
+        subparsers.required = False
+        return parser.parse_args(argv)
+
+
+class TestSubparserRoutingFallback:
+    """Verify the bpo-9338 defensive routing works for all key cases."""
+
+    def test_direct_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["model"])
+        assert args.command == "model"
+
+    def test_subcommand_with_flags(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+
+    def test_bare_hermes_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, [])
+        assert args.command is None
+
+    def test_flags_only_defaults_to_none(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo"])
+        assert args.command is None
+        assert args.yolo is True
+
+    def test_continue_flag_alone(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c"])
+        assert args.command is None
+        assert args.continue_last is True
+
+    def test_continue_with_session_name(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject"])
+        assert args.command is None
+        assert args.continue_last == "myproject"
+
+    def test_continue_with_subcommand_name_as_session(self):
+        """Edge case: session named 'model' — should be treated as session name, not subcommand."""
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "model"])
+        assert args.command is None
+        assert args.continue_last == "model"
+
+    def test_continue_with_session_then_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-c", "myproject", "model"])
+        assert args.command == "model"
+        assert args.continue_last == "myproject"
+
+    def test_chat_with_query(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["chat", "-q", "hello"])
+        assert args.command == "chat"
+        assert args.query == "hello"
+
+    def test_resume_flag(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123"])
+        assert args.command is None
+        assert args.resume == "abc123"
+
+    def test_resume_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-r", "abc123", "chat"])
+        assert args.command == "chat"
+        assert args.resume == "abc123"
+
+    def test_skills_flag_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["-s", "myskill", "chat"])
+        assert args.command == "chat"
+        assert args.skills == ["myskill"]
+
+    def test_all_flags_with_subcommand(self):
+        parser, sub = _build_parser()
+        args = _safe_parse(parser, sub, ["--yolo", "-w", "-s", "myskill", "model"])
+        assert args.command == "model"
+        assert args.yolo is True
+        assert args.worktree is True
+        assert args.skills == ["myskill"]
@@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
    _platform_toolset_summary,
    _save_platform_tools,
    _toolset_has_keys,
+    CONFIGURABLE_TOOLSETS,
    TOOL_CATEGORIES,
    _visible_providers,
    tools_command,
@@ -22,6 +23,15 @@ def test_get_platform_tools_uses_default_when_platform_not_configured():
    assert enabled


+def test_configurable_toolsets_include_messaging():
+    assert any(ts_key == "messaging" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
+
+def test_get_platform_tools_default_telegram_includes_messaging():
+    enabled = _get_platform_tools({}, "telegram")
+
+    assert "messaging" in enabled
+
+
 def test_get_platform_tools_preserves_explicit_empty_selection():
    config = {"platform_toolsets": {"cli": []}}

@@ -694,6 +694,8 @@ class TestNewEndpoints:
        assert "totals" in data
        assert isinstance(data["daily"], list)
        assert "total_sessions" in data["totals"]
+        assert "total_cache_write" in data["totals"]
+        assert "total_api_calls" in data["totals"]

    def test_session_token_endpoint_removed(self):
        """GET /api/auth/session-token no longer exists."""
@@ -984,3 +986,195 @@ class TestModelInfoEndpoint:
        assert resp.status_code == 200
        data = resp.json()
        assert data["auto_context_length"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Gateway health probe tests
+# ---------------------------------------------------------------------------
+
+
+class TestProbeGatewayHealth:
+    """Tests for _probe_gateway_health() — cross-container gateway detection."""
+
+    def test_returns_false_when_no_url_configured(self, monkeypatch):
+        """When GATEWAY_HEALTH_URL is unset, the probe returns (False, None)."""
+        import hermes_cli.web_server as ws
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", None)
+        alive, body = ws._probe_gateway_health()
+        assert alive is False
+        assert body is None
+
+    def test_normalizes_url_with_health_suffix(self, monkeypatch):
+        """If the user sets the URL to include /health, it's stripped to base."""
+        import hermes_cli.web_server as ws
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642/health")
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_TIMEOUT", 1)
+        # Both paths should fail (no server), but we verify they were constructed
+        # correctly by checking the URLs attempted.
+        calls = []
+        original_urlopen = ws.urllib.request.urlopen
+
+        def mock_urlopen(req, **kwargs):
+            calls.append(req.full_url)
+            raise ConnectionError("mock")
+
+        monkeypatch.setattr(ws.urllib.request, "urlopen", mock_urlopen)
+        alive, body = ws._probe_gateway_health()
+        assert alive is False
+        assert "http://gw:8642/health/detailed" in calls
+        assert "http://gw:8642/health" in calls
+
+    def test_normalizes_url_with_health_detailed_suffix(self, monkeypatch):
+        """If the user sets the URL to include /health/detailed, it's stripped to base."""
+        import hermes_cli.web_server as ws
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642/health/detailed")
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_TIMEOUT", 1)
+        calls = []
+
+        def mock_urlopen(req, **kwargs):
+            calls.append(req.full_url)
+            raise ConnectionError("mock")
+
+        monkeypatch.setattr(ws.urllib.request, "urlopen", mock_urlopen)
+        ws._probe_gateway_health()
+        assert "http://gw:8642/health/detailed" in calls
+        assert "http://gw:8642/health" in calls
+
+    def test_successful_detailed_probe(self, monkeypatch):
+        """Successful /health/detailed probe returns (True, body_dict)."""
+        import hermes_cli.web_server as ws
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642")
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_TIMEOUT", 1)
+
+        response_body = json.dumps({
+            "status": "ok",
+            "gateway_state": "running",
+            "pid": 42,
+        })
+
+        mock_resp = MagicMock()
+        mock_resp.status = 200
+        mock_resp.read.return_value = response_body.encode()
+        mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        monkeypatch.setattr(ws.urllib.request, "urlopen", lambda req, **kw: mock_resp)
+        alive, body = ws._probe_gateway_health()
+        assert alive is True
+        assert body["status"] == "ok"
+        assert body["pid"] == 42
+
+    def test_detailed_fails_falls_back_to_simple_health(self, monkeypatch):
+        """If /health/detailed fails, falls back to /health."""
+        import hermes_cli.web_server as ws
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642")
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_TIMEOUT", 1)
+
+        call_count = [0]
+
+        def mock_urlopen(req, **kwargs):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                raise ConnectionError("detailed failed")
+            mock_resp = MagicMock()
+            mock_resp.status = 200
+            mock_resp.read.return_value = json.dumps({"status": "ok"}).encode()
+            mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+            mock_resp.__exit__ = MagicMock(return_value=False)
+            return mock_resp
+
+        monkeypatch.setattr(ws.urllib.request, "urlopen", mock_urlopen)
+        alive, body = ws._probe_gateway_health()
+        assert alive is True
+        assert body["status"] == "ok"
+        assert call_count[0] == 2
+
+
+class TestStatusRemoteGateway:
+    """Tests for /api/status with remote gateway health fallback."""
+
+    @pytest.fixture(autouse=True)
+    def _setup_test_client(self):
+        try:
+            from starlette.testclient import TestClient
+        except ImportError:
+            pytest.skip("fastapi/starlette not installed")
+
+        from hermes_cli.web_server import app, _SESSION_TOKEN
+        self.client = TestClient(app)
+        self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
+
+    def test_status_falls_back_to_remote_probe(self, monkeypatch):
+        """When local PID check fails and remote probe succeeds, gateway shows running."""
+        import hermes_cli.web_server as ws
+
+        monkeypatch.setattr(ws, "get_running_pid", lambda: None)
+        monkeypatch.setattr(ws, "read_runtime_status", lambda: None)
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642")
+        monkeypatch.setattr(ws, "_probe_gateway_health", lambda: (True, {
+            "status": "ok",
+            "gateway_state": "running",
+            "platforms": {"telegram": {"state": "connected"}},
+            "pid": 999,
+        }))
+
+        resp = self.client.get("/api/status")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["gateway_running"] is True
+        assert data["gateway_pid"] == 999
+        assert data["gateway_state"] == "running"
+
+    def test_status_remote_probe_not_attempted_when_local_pid_found(self, monkeypatch):
+        """When local PID check succeeds, the remote probe is never called."""
+        import hermes_cli.web_server as ws
+
+        monkeypatch.setattr(ws, "get_running_pid", lambda: 1234)
+        monkeypatch.setattr(ws, "read_runtime_status", lambda: {
+            "gateway_state": "running",
+            "platforms": {},
+        })
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642")
+        probe_called = [False]
+        original = ws._probe_gateway_health
+
+        def track_probe():
+            probe_called[0] = True
+            return original()
+
+        monkeypatch.setattr(ws, "_probe_gateway_health", track_probe)
+
+        resp = self.client.get("/api/status")
+        assert resp.status_code == 200
+        assert not probe_called[0]
+
+    def test_status_remote_probe_not_attempted_when_no_url(self, monkeypatch):
+        """When GATEWAY_HEALTH_URL is unset, no probe is attempted."""
+        import hermes_cli.web_server as ws
+
+        monkeypatch.setattr(ws, "get_running_pid", lambda: None)
+        monkeypatch.setattr(ws, "read_runtime_status", lambda: None)
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", None)
+
+        resp = self.client.get("/api/status")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["gateway_running"] is False
+
+    def test_status_remote_running_null_pid(self, monkeypatch):
+        """Remote gateway running but PID not in response — pid should be None."""
+        import hermes_cli.web_server as ws
+
+        monkeypatch.setattr(ws, "get_running_pid", lambda: None)
+        monkeypatch.setattr(ws, "read_runtime_status", lambda: None)
+        monkeypatch.setattr(ws, "_GATEWAY_HEALTH_URL", "http://gw:8642")
+        monkeypatch.setattr(ws, "_probe_gateway_health", lambda: (True, {
+            "status": "ok",
+        }))
+
+        resp = self.client.get("/api/status")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["gateway_running"] is True
+        assert data["gateway_pid"] is None
+        assert data["gateway_state"] == "running"
@@ -136,33 +136,29 @@ class TestGatewaySkipsPersistenceOnFailure:
    the gateway should NOT persist messages to the transcript."""

    def test_agent_failed_early_detected(self):
-        """The agent_failed_early flag is True when failed=True and
-        no final_response."""
+        """The agent_failed_early flag is True when failed=True,
+        regardless of final_response."""
        agent_result = {
            "failed": True,
            "final_response": None,
            "messages": [],
            "error": "Non-retryable client error",
        }
-        agent_failed_early = (
-            agent_result.get("failed")
-            and not agent_result.get("final_response")
-        )
+        agent_failed_early = bool(agent_result.get("failed"))
        assert agent_failed_early

-    def test_agent_with_response_not_failed_early(self):
-        """When the agent has a final_response, it's not a failed-early
-        scenario even if failed=True."""
+    def test_agent_failed_with_error_response_still_detected(self):
+        """When _run_agent_blocking converts an error to final_response,
+        the failed flag should still trigger agent_failed_early.  This
+        was the core bug in #9893 — the old guard checked
+        ``not final_response`` which was always truthy after conversion."""
        agent_result = {
            "failed": True,
-            "final_response": "Here is a partial response",
+            "final_response": "⚠️ Request payload too large: max compression attempts reached.",
            "messages": [],
        }
-        agent_failed_early = (
-            agent_result.get("failed")
-            and not agent_result.get("final_response")
-        )
-        assert not agent_failed_early
+        agent_failed_early = bool(agent_result.get("failed"))
+        assert agent_failed_early

    def test_successful_agent_not_failed_early(self):
        """A successful agent result should not trigger skip."""
@@ -170,13 +166,41 @@ class TestGatewaySkipsPersistenceOnFailure:
            "final_response": "Hello!",
            "messages": [{"role": "assistant", "content": "Hello!"}],
        }
-        agent_failed_early = (
-            agent_result.get("failed")
-            and not agent_result.get("final_response")
-        )
+        agent_failed_early = bool(agent_result.get("failed"))
        assert not agent_failed_early


+class TestCompressionExhaustedFlag:
+    """When compression is exhausted, the agent should set both
+    failed=True and compression_exhausted=True so the gateway can
+    auto-reset the session.  (#9893)"""
+
+    def test_compression_exhausted_returns_carry_flag(self):
+        """Simulate the return dict from a compression-exhausted agent."""
+        agent_result = {
+            "messages": [],
+            "completed": False,
+            "api_calls": 3,
+            "error": "Request payload too large: max compression attempts (3) reached.",
+            "partial": True,
+            "failed": True,
+            "compression_exhausted": True,
+        }
+        assert agent_result.get("failed")
+        assert agent_result.get("compression_exhausted")
+
+    def test_normal_failure_not_compression_exhausted(self):
+        """Non-compression failures should not have compression_exhausted."""
+        agent_result = {
+            "messages": [],
+            "completed": False,
+            "failed": True,
+            "error": "Invalid API response after 3 retries",
+        }
+        assert agent_result.get("failed")
+        assert not agent_result.get("compression_exhausted")
+
+
 # ---------------------------------------------------------------------------
 # Test 3: Context-overflow error messages
 # ---------------------------------------------------------------------------
@@ -0,0 +1,111 @@
+"""Tests that invalid context_length values in config produce visible warnings."""
+
+from unittest.mock import patch, MagicMock, call
+
+
+def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-4.6"):
+    """Build an AIAgent with the given model config."""
+    cfg = {"model": model_cfg}
+    if custom_providers is not None:
+        cfg["custom_providers"] = custom_providers
+
+    with (
+        patch("hermes_cli.config.load_config", return_value=cfg),
+        patch("agent.model_metadata.get_model_context_length", return_value=128_000),
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        from run_agent import AIAgent
+
+        agent = AIAgent(
+            model=model,
+            api_key="test-key-1234567890",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+    return agent
+
+
+def test_valid_integer_context_length_no_warning():
+    """Plain integer context_length should work silently."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": 256000})
+    assert agent._config_context_length == 256000
+    # No warning about invalid context_length
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_string_k_suffix_context_length_warns():
+    """context_length: '256K' should warn the user clearly."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256K"})
+    assert agent._config_context_length is None
+    # Should have warned
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "plain integer" in str(warning_calls[0])
+
+
+def test_string_numeric_context_length_works():
+    """context_length: '256000' (string) should parse fine via int()."""
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent({"default": "gpt5.4", "provider": "custom",
+                              "base_url": "http://localhost:4000/v1",
+                              "context_length": "256000"})
+    assert agent._config_context_length == 256000
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
+
+
+def test_custom_providers_invalid_context_length_warns():
+    """Invalid context_length in custom_providers should warn."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": "256K"}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    warning_calls = [c for c in mock_logger.warning.call_args_list
+                     if "Invalid" in str(c) and "256K" in str(c)]
+    assert len(warning_calls) == 1
+    assert "custom_providers" in str(warning_calls[0])
+
+
+def test_custom_providers_valid_context_length():
+    """Valid integer in custom_providers should work silently."""
+    custom_providers = [
+        {
+            "name": "LiteLLM",
+            "base_url": "http://localhost:4000/v1",
+            "models": {
+                "gpt5.4": {"context_length": 256000}
+            },
+        }
+    ]
+    with patch("run_agent.logger") as mock_logger:
+        agent = _build_agent(
+            {"default": "gpt5.4", "provider": "custom",
+             "base_url": "http://localhost:4000/v1"},
+            custom_providers=custom_providers,
+            model="gpt5.4",
+        )
+    for c in mock_logger.warning.call_args_list:
+        assert "Invalid" not in str(c)
@@ -142,6 +142,33 @@ class TestSurrogateVsAsciiSanitization:
        assert _sanitize_messages_surrogates(messages) is False


+class TestApiKeyNonAsciiSanitization:
+    """Tests for API key sanitization in the UnicodeEncodeError recovery.
+
+    Covers the root cause of issue #6843: a non-ASCII character (ʋ U+028B)
+    in the API key causes httpx to fail when encoding the Authorization
+    header as ASCII.  The recovery block must strip non-ASCII from the key.
+    """
+
+    def test_strip_non_ascii_from_api_key(self):
+        """_strip_non_ascii removes ʋ from an API key string."""
+        key = "sk-proj-abc" + "ʋ" + "def"
+        assert _strip_non_ascii(key) == "sk-proj-abcdef"
+
+    def test_api_key_at_position_153(self):
+        """Reproduce the exact error: ʋ at position 153 in 'Bearer <key>'."""
+        key = "sk-proj-" + "a" * 138 + "ʋ" + "bcd"
+        auth_value = f"Bearer {key}"
+        # This is what httpx does — and it fails:
+        with pytest.raises(UnicodeEncodeError) as exc_info:
+            auth_value.encode("ascii")
+        assert exc_info.value.start == 153
+        # After sanitization, it should work:
+        sanitized_key = _strip_non_ascii(key)
+        sanitized_auth = f"Bearer {sanitized_key}"
+        sanitized_auth.encode("ascii")  # should not raise
+
+
 class TestSanitizeToolsNonAscii:
    """Tests for _sanitize_tools_non_ascii."""

@@ -203,3 +230,67 @@ class TestSanitizeStructureNonAscii:
        assert _sanitize_structure_non_ascii(payload) is True
        assert payload["default_headers"]["X-Title"] == "Hermes  Agent"
        assert payload["default_headers"]["User-Agent"] == "Hermes/1.0 "
+
+
+class TestApiKeyClientSync:
+    """Verify that ASCII recovery updates the live OpenAI client's api_key.
+
+    The OpenAI SDK stores its own copy of api_key which auth_headers reads
+    dynamically.  If only self.api_key is updated but self.client.api_key
+    is not, the next request still sends the corrupted key in the
+    Authorization header.
+    """
+
+    def test_client_api_key_updated_on_sanitize(self):
+        """Simulate the recovery path and verify client.api_key is synced."""
+        from unittest.mock import MagicMock
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-abc\u028bdef"  # ʋ lookalike at position 11
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.quiet_mode = True
+
+        # Mock client with its own api_key attribute (like the real OpenAI client)
+        mock_client = MagicMock()
+        mock_client.api_key = bad_key
+        agent.client = mock_client
+
+        # --- replicate the recovery logic from run_agent.py ---
+        _raw_key = agent.api_key
+        _clean_key = _strip_non_ascii(_raw_key)
+        assert _clean_key != _raw_key, "test precondition: key should have non-ASCII"
+
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        # All three locations should now hold the clean key
+        assert agent.api_key == "sk-proj-abcdef"
+        assert agent._client_kwargs["api_key"] == "sk-proj-abcdef"
+        assert agent.client.api_key == "sk-proj-abcdef"
+        # The bad char should be gone from all of them
+        assert "\u028b" not in agent.api_key
+        assert "\u028b" not in agent._client_kwargs["api_key"]
+        assert "\u028b" not in agent.client.api_key
+
+    def test_client_none_does_not_crash(self):
+        """Recovery should not crash when client is None (pre-init)."""
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        bad_key = "sk-proj-\u028b"
+        agent.api_key = bad_key
+        agent._client_kwargs = {"api_key": bad_key}
+        agent.client = None
+
+        _clean_key = _strip_non_ascii(bad_key)
+        agent.api_key = _clean_key
+        agent._client_kwargs["api_key"] = _clean_key
+        if getattr(agent, "client", None) is not None and hasattr(agent.client, "api_key"):
+            agent.client.api_key = _clean_key
+
+        assert agent.api_key == "sk-proj-"
+        assert agent.client is None  # should not have been touched
@@ -62,6 +62,27 @@ class TestSessionLifecycle:
        assert session["input_tokens"] == 300
        assert session["output_tokens"] == 150

+    def test_update_token_counts_tracks_api_call_count(self, db):
+        """api_call_count increments with each update_token_counts call."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 3
+
+    def test_update_token_counts_api_call_count_absolute(self, db):
+        """absolute mode sets api_call_count directly."""
+        db.create_session(session_id="s1", source="cli")
+        db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
+        db.update_token_counts("s1", input_tokens=300, output_tokens=150,
+                               api_call_count=5, absolute=True)
+
+        session = db.get_session("s1")
+        assert session["api_call_count"] == 5
+        assert session["input_tokens"] == 300
+
    def test_update_token_counts_backfills_model_when_null(self, db):
        db.create_session(session_id="s1", source="telegram")
        db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
@@ -935,7 +956,7 @@ class TestSchemaInit:
    def test_schema_version(self, db):
        cursor = db._conn.execute("SELECT version FROM schema_version")
        version = cursor.fetchone()[0]
-        assert version == 6
+        assert version == 7

    def test_title_column_exists(self, db):
        """Verify the title column was created in the sessions table."""
@@ -996,13 +1017,19 @@ class TestSchemaInit:

        # Verify migration
        cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 6
+        assert cursor.fetchone()[0] == 7

        # Verify title column exists and is NULL for existing sessions
        session = migrated_db.get_session("existing")
        assert session is not None
        assert session["title"] is None

+        # Verify api_call_count column was added with default 0
+        cursor = migrated_db._conn.execute(
+            "SELECT api_call_count FROM sessions WHERE id = 'existing'"
+        )
+        assert cursor.fetchone()[0] == 0
+
        # Verify we can set title on migrated session
        assert migrated_db.set_session_title("existing", "Migrated Title") is True
        session = migrated_db.get_session("existing")
@@ -116,6 +116,22 @@ class TestValidateToolset:
    def test_invalid(self):
        assert validate_toolset("nonexistent") is False

+    def test_mcp_alias_uses_live_registry(self, monkeypatch):
+        reg = ToolRegistry()
+        reg.register(
+            name="mcp_dynserver_ping",
+            toolset="mcp-dynserver",
+            schema=_make_schema("mcp_dynserver_ping", "Ping"),
+            handler=_dummy_handler,
+        )
+        reg.register_toolset_alias("dynserver", "mcp-dynserver")
+
+        monkeypatch.setattr("tools.registry.registry", reg)
+
+        assert validate_toolset("dynserver") is True
+        assert validate_toolset("mcp-dynserver") is True
+        assert "mcp_dynserver_ping" in resolve_toolset("dynserver")
+

 class TestGetToolsetInfo:
    def test_leaf(self):
@@ -150,6 +166,23 @@ class TestCreateCustomToolset:
            del TOOLSETS["_test_custom"]


+class TestRegistryOwnedToolsets:
+    def test_registry_membership_is_live(self, monkeypatch):
+        reg = ToolRegistry()
+        reg.register(
+            name="test_live_toolset_tool",
+            toolset="test-live-toolset",
+            schema=_make_schema("test_live_toolset_tool", "Live"),
+            handler=_dummy_handler,
+        )
+
+        monkeypatch.setattr("tools.registry.registry", reg)
+
+        assert validate_toolset("test-live-toolset") is True
+        assert get_toolset("test-live-toolset")["tools"] == ["test_live_toolset_tool"]
+        assert resolve_toolset("test-live-toolset") == ["test_live_toolset_tool"]
+
+
 class TestToolsetConsistency:
    """Verify structural integrity of the built-in TOOLSETS dict."""

@@ -550,11 +550,12 @@ class TestGatewayProtection:
        dangerous, key, desc = detect_dangerous_command(cmd)
        assert dangerous is False

-    def test_systemctl_restart_not_flagged(self):
-        """Using systemctl to manage the gateway is the correct approach."""
+    def test_systemctl_restart_flagged(self):
+        """systemctl restart kills running agents and should require approval."""
        cmd = "systemctl --user restart hermes-gateway"
        dangerous, key, desc = detect_dangerous_command(cmd)
-        assert dangerous is False
+        assert dangerous is True
+        assert "stop/restart" in desc

    def test_pkill_hermes_detected(self):
        """pkill targeting hermes/gateway processes must be caught."""
@@ -31,18 +31,25 @@ def _clear_browser_caches():


 class TestSanePath:
-    """Verify _SANE_PATH includes Homebrew directories."""
+    """Verify _SANE_PATH includes fallback directories used by browser_tool."""
+
+    def test_includes_termux_bin(self):
+        assert "/data/data/com.termux/files/usr/bin" in _SANE_PATH.split(os.pathsep)
+
+    def test_includes_termux_sbin(self):
+        assert "/data/data/com.termux/files/usr/sbin" in _SANE_PATH.split(os.pathsep)

    def test_includes_homebrew_bin(self):
-        assert "/opt/homebrew/bin" in _SANE_PATH
+        assert "/opt/homebrew/bin" in _SANE_PATH.split(os.pathsep)

    def test_includes_homebrew_sbin(self):
-        assert "/opt/homebrew/sbin" in _SANE_PATH
+        assert "/opt/homebrew/sbin" in _SANE_PATH.split(os.pathsep)

    def test_includes_standard_dirs(self):
-        assert "/usr/local/bin" in _SANE_PATH
-        assert "/usr/bin" in _SANE_PATH
-        assert "/bin" in _SANE_PATH
+        path_parts = _SANE_PATH.split(os.pathsep)
+        assert "/usr/local/bin" in path_parts
+        assert "/usr/bin" in path_parts
+        assert "/bin" in path_parts


 class TestDiscoverHomebrewNodeDirs:
@@ -143,6 +150,44 @@ class TestFindAgentBrowser:
            result = _find_agent_browser()
            assert result == "npx agent-browser"

+    def test_finds_npx_in_termux_fallback_path(self):
+        """Should find npx when only Termux fallback dirs are available."""
+        def mock_which(cmd, path=None):
+            if cmd == "agent-browser":
+                return None
+            if cmd == "npx":
+                if path and "/data/data/com.termux/files/usr/bin" in path:
+                    return "/data/data/com.termux/files/usr/bin/npx"
+                return None
+            return None
+
+        original_path_exists = Path.exists
+
+        def mock_path_exists(self):
+            if "node_modules" in str(self) and "agent-browser" in str(self):
+                return False
+            return original_path_exists(self)
+
+        real_isdir = os.path.isdir
+
+        def selective_isdir(path):
+            if path in (
+                "/data/data/com.termux/files/usr/bin",
+                "/data/data/com.termux/files/usr/sbin",
+            ):
+                return True
+            return real_isdir(path)
+
+        with patch("shutil.which", side_effect=mock_which), \
+             patch("os.path.isdir", side_effect=selective_isdir), \
+             patch.object(Path, "exists", mock_path_exists), \
+             patch(
+                 "tools.browser_tool._discover_homebrew_node_dirs",
+                 return_value=[],
+             ):
+            result = _find_agent_browser()
+            assert result == "npx agent-browser"
+
    def test_raises_when_not_found(self):
        """Should raise FileNotFoundError when nothing works."""
        original_path_exists = Path.exists
@@ -399,3 +444,51 @@ class TestRunBrowserCommandPathConstruction:
        result_path = captured_env.get("PATH", "")
        assert "/opt/homebrew/bin" in result_path
        assert "/opt/homebrew/sbin" in result_path
+
+    def test_subprocess_path_includes_termux_fallback_dirs(self, tmp_path):
+        """Termux fallback dirs should survive browser PATH rebuilding."""
+        captured_env = {}
+
+        mock_proc = MagicMock()
+        mock_proc.returncode = 0
+        mock_proc.wait.return_value = 0
+
+        def capture_popen(cmd, **kwargs):
+            captured_env.update(kwargs.get("env", {}))
+            return mock_proc
+
+        fake_session = {
+            "session_name": "test-session",
+            "session_id": "test-id",
+            "cdp_url": None,
+        }
+
+        fake_json = json.dumps({"success": True})
+        real_isdir = os.path.isdir
+
+        def selective_isdir(path):
+            if path in (
+                "/data/data/com.termux/files/usr/bin",
+                "/data/data/com.termux/files/usr/sbin",
+            ):
+                return True
+            if path.startswith(str(tmp_path)):
+                return True
+            return real_isdir(path)
+
+        with patch("tools.browser_tool._find_agent_browser", return_value="/usr/local/bin/agent-browser"), \
+             patch("tools.browser_tool._get_session_info", return_value=fake_session), \
+             patch("tools.browser_tool._socket_safe_tmpdir", return_value=str(tmp_path)), \
+             patch("tools.browser_tool._discover_homebrew_node_dirs", return_value=[]), \
+             patch("os.path.isdir", side_effect=selective_isdir), \
+             patch("subprocess.Popen", side_effect=capture_popen), \
+             patch("os.open", return_value=99), \
+             patch("os.close"), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch.dict(os.environ, {"PATH": "/usr/bin:/bin", "HOME": "/home/test"}, clear=True):
+            with patch("builtins.open", mock_open(read_data=fake_json)):
+                _run_browser_command("test-task", "navigate", ["https://example.com"])
+
+        result_path = captured_env.get("PATH", "")
+        assert "/data/data/com.termux/files/usr/bin" in result_path
+        assert "/data/data/com.termux/files/usr/sbin" in result_path
@@ -46,3 +46,59 @@ class TestFindDocker:
        with patch("tools.environments.docker.shutil.which", return_value=None):
            second = docker_mod.find_docker()
        assert first == second == "/usr/local/bin/docker"
+
+    def test_env_var_override_takes_precedence(self, tmp_path):
+        """HERMES_DOCKER_BINARY overrides PATH and known-location discovery."""
+        fake_binary = tmp_path / "podman"
+        fake_binary.write_text("#!/bin/sh\n")
+        fake_binary.chmod(0o755)
+
+        with patch.dict(os.environ, {"HERMES_DOCKER_BINARY": str(fake_binary)}), \
+             patch("tools.environments.docker.shutil.which", return_value="/usr/bin/docker"):
+            result = docker_mod.find_docker()
+        assert result == str(fake_binary)
+
+    def test_env_var_override_ignored_if_not_executable(self, tmp_path):
+        """Non-executable HERMES_DOCKER_BINARY falls through to normal discovery."""
+        fake_binary = tmp_path / "podman"
+        fake_binary.write_text("#!/bin/sh\n")
+        fake_binary.chmod(0o644)  # not executable
+
+        with patch.dict(os.environ, {"HERMES_DOCKER_BINARY": str(fake_binary)}), \
+             patch("tools.environments.docker.shutil.which", return_value="/usr/bin/docker"):
+            result = docker_mod.find_docker()
+        assert result == "/usr/bin/docker"
+
+    def test_env_var_override_ignored_if_nonexistent(self):
+        """Non-existent HERMES_DOCKER_BINARY path falls through."""
+        with patch.dict(os.environ, {"HERMES_DOCKER_BINARY": "/nonexistent/podman"}), \
+             patch("tools.environments.docker.shutil.which", return_value="/usr/bin/docker"):
+            result = docker_mod.find_docker()
+        assert result == "/usr/bin/docker"
+
+    def test_podman_on_path_used_when_docker_missing(self):
+        """When docker is not on PATH, podman is tried next."""
+        def which_side_effect(name):
+            if name == "docker":
+                return None
+            if name == "podman":
+                return "/usr/bin/podman"
+            return None
+
+        with patch("tools.environments.docker.shutil.which", side_effect=which_side_effect), \
+             patch("tools.environments.docker._DOCKER_SEARCH_PATHS", []):
+            result = docker_mod.find_docker()
+        assert result == "/usr/bin/podman"
+
+    def test_docker_preferred_over_podman(self):
+        """When both docker and podman are on PATH, docker wins."""
+        def which_side_effect(name):
+            if name == "docker":
+                return "/usr/bin/docker"
+            if name == "podman":
+                return "/usr/bin/podman"
+            return None
+
+        with patch("tools.environments.docker.shutil.which", side_effect=which_side_effect):
+            result = docker_mod.find_docker()
+        assert result == "/usr/bin/docker"
@@ -21,34 +21,19 @@ class TestRegisterServerTools:
    def mock_registry(self):
        return ToolRegistry()

-    @pytest.fixture
-    def mock_toolsets(self):
-        return {
-            "hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
-            "hermes-telegram": {"tools": ["terminal"], "description": "TG", "includes": []},
-            "custom-toolset": {"tools": [], "description": "Other", "includes": []},
-        }
-
-    def test_injects_hermes_toolsets(self, mock_registry, mock_toolsets):
-        """Tools are injected into hermes-* toolsets but not custom ones."""
+    def test_exposes_live_server_aliases(self, mock_registry):
+        """Registered MCP tools are reachable via live raw-server aliases."""
        server = MCPServerTask("my_srv")
        server._tools = [_make_mcp_tool("my_tool", "desc")]
        server.session = MagicMock()
+        from toolsets import resolve_toolset, validate_toolset

-        with patch("tools.registry.registry", mock_registry), \
-            patch("toolsets.create_custom_toolset"), \
-            patch.dict("toolsets.TOOLSETS", mock_toolsets, clear=True):
-
+        with patch("tools.registry.registry", mock_registry):
            registered = _register_server_tools("my_srv", server, {})
-
-        assert "mcp_my_srv_my_tool" in registered
-        assert "mcp_my_srv_my_tool" in mock_registry.get_all_tool_names()
-
-        # Injected into hermes-* toolsets
-        assert "mcp_my_srv_my_tool" in mock_toolsets["hermes-cli"]["tools"]
-        assert "mcp_my_srv_my_tool" in mock_toolsets["hermes-telegram"]["tools"]
-        # NOT into non-hermes toolsets
-        assert "mcp_my_srv_my_tool" not in mock_toolsets["custom-toolset"]["tools"]
+            assert "mcp_my_srv_my_tool" in registered
+            assert "mcp_my_srv_my_tool" in mock_registry.get_all_tool_names()
+            assert validate_toolset("my_srv") is True
+            assert "mcp_my_srv_my_tool" in resolve_toolset("my_srv")


 class TestRefreshTools:
@@ -58,19 +43,13 @@ class TestRefreshTools:
    def mock_registry(self):
        return ToolRegistry()

-    @pytest.fixture
-    def mock_toolsets(self):
-        return {
-            "hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
-            "hermes-telegram": {"tools": ["terminal"], "description": "TG", "includes": []},
-        }
-
    @pytest.mark.asyncio
-    async def test_nuke_and_repave(self, mock_registry, mock_toolsets):
+    async def test_nuke_and_repave(self, mock_registry):
        """Old tools are removed and new tools registered on refresh."""
        server = MCPServerTask("live_srv")
        server._refresh_lock = asyncio.Lock()
        server._config = {}
+        from toolsets import resolve_toolset

        # Seed initial state: one old tool registered
        mock_registry.register(
@@ -79,7 +58,6 @@ class TestRefreshTools:
            description="", emoji="",
        )
        server._registered_tool_names = ["mcp_live_srv_old_tool"]
-        mock_toolsets["hermes-cli"]["tools"].append("mcp_live_srv_old_tool")

        # New tool list from server
        new_tool = _make_mcp_tool("new_tool", "new behavior")
@@ -89,20 +67,13 @@ class TestRefreshTools:
            )
        )

-        with patch("tools.registry.registry", mock_registry), \
-            patch("toolsets.create_custom_toolset"), \
-            patch.dict("toolsets.TOOLSETS", mock_toolsets, clear=True):
-
+        with patch("tools.registry.registry", mock_registry):
            await server._refresh_tools()
-
-        # Old tool completely gone
-        assert "mcp_live_srv_old_tool" not in mock_registry.get_all_tool_names()
-        assert "mcp_live_srv_old_tool" not in mock_toolsets["hermes-cli"]["tools"]
-
-        # New tool registered
-        assert "mcp_live_srv_new_tool" in mock_registry.get_all_tool_names()
-        assert "mcp_live_srv_new_tool" in mock_toolsets["hermes-cli"]["tools"]
-        assert server._registered_tool_names == ["mcp_live_srv_new_tool"]
+            assert "mcp_live_srv_old_tool" not in mock_registry.get_all_tool_names()
+            assert "mcp_live_srv_old_tool" not in resolve_toolset("live_srv")
+            assert "mcp_live_srv_new_tool" in mock_registry.get_all_tool_names()
+            assert "mcp_live_srv_new_tool" in resolve_toolset("live_srv")
+            assert server._registered_tool_names == ["mcp_live_srv_new_tool"]


 class TestMessageHandler:
@@ -165,6 +136,25 @@ class TestDeregister:
        # bar still in ts1, so check should remain
        assert "ts1" in reg._toolset_checks

+    def test_removes_toolset_alias_when_last_tool_is_removed(self):
+        reg = ToolRegistry()
+        reg.register(name="foo", toolset="mcp-srv", schema={}, handler=lambda x: x)
+        reg.register_toolset_alias("srv", "mcp-srv")
+
+        reg.deregister("foo")
+
+        assert reg.get_toolset_alias_target("srv") is None
+
+    def test_preserves_toolset_alias_while_toolset_still_exists(self):
+        reg = ToolRegistry()
+        reg.register(name="foo", toolset="mcp-srv", schema={}, handler=lambda x: x)
+        reg.register(name="bar", toolset="mcp-srv", schema={}, handler=lambda x: x)
+        reg.register_toolset_alias("srv", "mcp-srv")
+
+        reg.deregister("foo")
+
+        assert reg.get_toolset_alias_target("srv") == "mcp-srv"
+
    def test_noop_for_unknown_tool(self):
        reg = ToolRegistry()
        reg.deregister("nonexistent")  # Should not raise
@@ -184,11 +184,7 @@ class TestToolHandler:
    def _patch_mcp_loop(self, coro_side_effect=None):
        """Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
        def fake_run(coro, timeout=30):
-            loop = asyncio.new_event_loop()
-            try:
-                return loop.run_until_complete(coro)
-            finally:
-                loop.close()
+            return asyncio.run(coro)
        if coro_side_effect:
            return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=coro_side_effect)
        return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run)
@@ -365,10 +361,13 @@ class TestDiscoverAndRegister:

        _servers.pop("fs", None)

-    def test_toolset_created(self):
-        """A custom toolset is created for the MCP server."""
+    def test_toolset_resolves_live_from_registry(self):
+        """MCP toolsets resolve through the live registry without TOOLSETS mutation."""
+        from tools.registry import ToolRegistry
        from tools.mcp_tool import _discover_and_register_server, _servers, MCPServerTask
+        from toolsets import resolve_toolset, validate_toolset

+        mock_registry = ToolRegistry()
        mock_tools = [_make_mcp_tool("ping", "Ping")]
        mock_session = MagicMock()

@@ -378,16 +377,16 @@ class TestDiscoverAndRegister:
            server._tools = mock_tools
            return server

-        mock_create = MagicMock()
        with patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
-             patch("toolsets.create_custom_toolset", mock_create):
+             patch("tools.registry.registry", mock_registry):
            asyncio.run(
                _discover_and_register_server("myserver", {"command": "test"})
            )

-        mock_create.assert_called_once()
-        call_kwargs = mock_create.call_args
-        assert call_kwargs[1]["name"] == "mcp-myserver" or call_kwargs[0][0] == "mcp-myserver"
+            assert validate_toolset("myserver") is True
+            assert validate_toolset("mcp-myserver") is True
+            assert "mcp_myserver_ping" in resolve_toolset("myserver")
+            assert "mcp_myserver_ping" in resolve_toolset("mcp-myserver")

        _servers.pop("myserver", None)

@@ -550,12 +549,15 @@ class TestMCPServerTask:
 # ---------------------------------------------------------------------------

 class TestToolsetInjection:
-    def test_mcp_tools_added_to_all_hermes_toolsets(self):
-        """Discovered MCP tools are dynamically injected into all hermes-* toolsets."""
+    def test_mcp_tools_resolve_through_server_aliases(self):
+        """Discovered MCP tools resolve through raw server-name aliases."""
        from tools.mcp_tool import MCPServerTask
+        from tools.registry import ToolRegistry
+        from toolsets import resolve_toolset, validate_toolset

        mock_tools = [_make_mcp_tool("list_files", "List files")]
        mock_session = MagicMock()
+        mock_registry = ToolRegistry()

        fresh_servers = {}

@@ -565,43 +567,32 @@ class TestToolsetInjection:
            server._tools = mock_tools
            return server

-        fake_toolsets = {
-            "hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
-            "hermes-telegram": {"tools": ["terminal"], "description": "TG", "includes": []},
-            "hermes-gateway": {"tools": [], "description": "GW", "includes": []},
-            "non-hermes": {"tools": [], "description": "other", "includes": []},
-        }
        fake_config = {"fs": {"command": "npx", "args": []}}

        with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
             patch("tools.mcp_tool._servers", fresh_servers), \
             patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
             patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
-             patch("toolsets.TOOLSETS", fake_toolsets):
+             patch("tools.registry.registry", mock_registry):
            from tools.mcp_tool import discover_mcp_tools
            result = discover_mcp_tools()

-        assert "mcp_fs_list_files" in result
-        # All hermes-* toolsets get injection
-        assert "mcp_fs_list_files" in fake_toolsets["hermes-cli"]["tools"]
-        assert "mcp_fs_list_files" in fake_toolsets["hermes-telegram"]["tools"]
-        assert "mcp_fs_list_files" in fake_toolsets["hermes-gateway"]["tools"]
-        # Non-hermes toolset should NOT get injection
-        assert "mcp_fs_list_files" not in fake_toolsets["non-hermes"]["tools"]
-        # Original tools preserved
-        assert "terminal" in fake_toolsets["hermes-cli"]["tools"]
-        # Server name becomes a standalone toolset
-        assert "fs" in fake_toolsets
-        assert "mcp_fs_list_files" in fake_toolsets["fs"]["tools"]
-        assert fake_toolsets["fs"]["description"].startswith("MCP server '")
+            assert "mcp_fs_list_files" in result
+            assert validate_toolset("fs") is True
+            assert validate_toolset("mcp-fs") is True
+            assert "mcp_fs_list_files" in resolve_toolset("fs")
+            assert "mcp_fs_list_files" in resolve_toolset("mcp-fs")

    def test_server_toolset_skips_builtin_collision(self):
-        """MCP server named after a built-in toolset shouldn't overwrite it."""
+        """MCP raw aliases never overwrite a built-in toolset name."""
        from tools.mcp_tool import MCPServerTask
+        from tools.registry import ToolRegistry
+        from toolsets import resolve_toolset, validate_toolset

        mock_tools = [_make_mcp_tool("run", "Run command")]
        mock_session = MagicMock()
        fresh_servers = {}
+        mock_registry = ToolRegistry()

        async def fake_connect(name, config):
            server = MCPServerTask(name)
@@ -620,12 +611,15 @@ class TestToolsetInjection:
             patch("tools.mcp_tool._servers", fresh_servers), \
             patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
             patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
+             patch("tools.registry.registry", mock_registry), \
             patch("toolsets.TOOLSETS", fake_toolsets):
            from tools.mcp_tool import discover_mcp_tools
            discover_mcp_tools()

-        # Built-in toolset preserved — description unchanged
-        assert fake_toolsets["terminal"]["description"] == "Terminal tools"
+            assert fake_toolsets["terminal"]["description"] == "Terminal tools"
+            assert "mcp_terminal_run" not in resolve_toolset("terminal")
+            assert validate_toolset("mcp-terminal") is True
+            assert "mcp_terminal_run" in resolve_toolset("mcp-terminal")

    def test_server_connection_failure_skipped(self):
        """If one server fails to connect, others still proceed."""
@@ -776,6 +770,42 @@ class TestShutdown:
        assert len(_servers) == 0
        mock_server.shutdown.assert_called_once()

+    def test_shutdown_deregisters_registered_tools(self):
+        """shutdown_mcp_servers removes MCP tools and their raw alias."""
+        import tools.mcp_tool as mcp_mod
+        from tools.mcp_tool import MCPServerTask, shutdown_mcp_servers, _servers
+        from tools.registry import registry
+        from toolsets import resolve_toolset, validate_toolset
+
+        _servers.clear()
+        registry.register(
+            name="mcp_test_ping",
+            toolset="mcp-test",
+            schema={
+                "name": "mcp_test_ping",
+                "description": "Ping",
+                "parameters": {"type": "object", "properties": {}},
+            },
+            handler=lambda *_args, **_kwargs: "{}",
+        )
+        registry.register_toolset_alias("test", "mcp-test")
+
+        server = MCPServerTask("test")
+        server._registered_tool_names = ["mcp_test_ping"]
+        _servers["test"] = server
+
+        mcp_mod._ensure_mcp_loop()
+        try:
+            assert validate_toolset("test") is True
+            assert "mcp_test_ping" in resolve_toolset("test")
+            shutdown_mcp_servers()
+        finally:
+            mcp_mod._mcp_loop = None
+            mcp_mod._mcp_thread = None
+
+        assert "mcp_test_ping" not in registry.get_all_tool_names()
+        assert validate_toolset("test") is False
+
    def test_shutdown_handles_errors(self):
        """shutdown_mcp_servers handles errors during close gracefully."""
        import tools.mcp_tool as mcp_mod
@@ -1179,7 +1209,11 @@ class TestConfigurableTimeouts:
        try:
            handler = _make_tool_handler("test_srv", "my_tool", 180)
            with patch("tools.mcp_tool._run_on_mcp_loop") as mock_run:
-                mock_run.return_value = json.dumps({"result": "ok"})
+                def fake_run(coro, timeout=30):
+                    coro.close()
+                    return json.dumps({"result": "ok"})
+
+                mock_run.side_effect = fake_run
                handler({})
                # Verify timeout=180 was passed
                call_kwargs = mock_run.call_args
@@ -1279,11 +1313,7 @@ class TestUtilityHandlers:
    def _patch_mcp_loop(self):
        """Return a patch for _run_on_mcp_loop that runs the coroutine directly."""
        def fake_run(coro, timeout=30):
-            loop = asyncio.new_event_loop()
-            try:
-                return loop.run_until_complete(coro)
-            finally:
-                loop.close()
+            return asyncio.run(coro)
        return patch("tools.mcp_tool._run_on_mcp_loop", side_effect=fake_run)

    # -- list_resources --
@@ -2837,7 +2867,7 @@ class TestRegistryCollisionWarning:
    """registry.register() warns when a tool name is overwritten by a different toolset."""

    def test_overwrite_different_toolset_logs_warning(self, caplog):
-        """Overwriting a tool from a different toolset emits a warning."""
+        """Overwriting a tool from a different toolset is REJECTED with an error."""
        from tools.registry import ToolRegistry
        import logging

@@ -2847,11 +2877,13 @@ class TestRegistryCollisionWarning:

        reg.register(name="my_tool", toolset="builtin", schema=schema, handler=handler)

-        with caplog.at_level(logging.WARNING, logger="tools.registry"):
+        with caplog.at_level(logging.ERROR, logger="tools.registry"):
            reg.register(name="my_tool", toolset="mcp-ext", schema=schema, handler=handler)

-        assert any("collision" in r.message.lower() for r in caplog.records)
+        assert any("rejected" in r.message.lower() for r in caplog.records)
        assert any("builtin" in r.message and "mcp-ext" in r.message for r in caplog.records)
+        # The original tool should still be from 'builtin', not overwritten
+        assert reg.get_toolset_for_tool("my_tool") == "builtin"

    def test_overwrite_same_toolset_no_warning(self, caplog):
        """Re-registering within the same toolset is silent (e.g. reconnect)."""
@@ -3036,14 +3068,23 @@ class TestSanitizeMcpNameComponent:
            assert "/" not in name
            assert "." not in name

-    def test_slash_in_sync_mcp_toolsets(self):
-        """_sync_mcp_toolsets uses sanitize consistently with _convert_mcp_schema."""
-        from tools.mcp_tool import sanitize_mcp_name_component
+    def test_slash_in_server_alias_resolution(self):
+        """Server names with slashes resolve through their live MCP alias."""
+        from tools.registry import ToolRegistry
+        from toolsets import resolve_toolset, validate_toolset

-        # Verify the prefix generation matches what _convert_mcp_schema produces
-        server_name = "ai.exa/exa"
-        safe_prefix = f"mcp_{sanitize_mcp_name_component(server_name)}_"
-        assert safe_prefix == "mcp_ai_exa_exa_"
+        reg = ToolRegistry()
+        reg.register(
+            name="mcp_ai_exa_exa_search",
+            toolset="mcp-ai.exa/exa",
+            schema={"name": "mcp_ai_exa_exa_search", "description": "Search", "parameters": {"type": "object", "properties": {}}},
+            handler=lambda *_args, **_kwargs: "{}",
+        )
+        reg.register_toolset_alias("ai.exa/exa", "mcp-ai.exa/exa")
+
+        with patch("tools.registry.registry", reg):
+            assert validate_toolset("ai.exa/exa") is True
+            assert "mcp_ai_exa_exa_search" in resolve_toolset("ai.exa/exa")


 # ---------------------------------------------------------------------------
@@ -2,8 +2,10 @@

 import json
 import threading
+from pathlib import Path
+from unittest.mock import patch

-from tools.registry import ToolRegistry
+from tools.registry import ToolRegistry, discover_builtin_tools


 def _dummy_handler(args, **kwargs):
@@ -286,6 +288,74 @@ class TestCheckFnExceptionHandling:
        assert any(u["name"] == "crashes" for u in unavailable)


+class TestBuiltinDiscovery:
+    def test_matches_previous_manual_builtin_tool_set(self):
+        expected = {
+            "tools.browser_tool",
+            "tools.clarify_tool",
+            "tools.code_execution_tool",
+            "tools.cronjob_tools",
+            "tools.delegate_tool",
+            "tools.file_tools",
+            "tools.homeassistant_tool",
+            "tools.image_generation_tool",
+            "tools.memory_tool",
+            "tools.mixture_of_agents_tool",
+            "tools.process_registry",
+            "tools.rl_training_tool",
+            "tools.send_message_tool",
+            "tools.session_search_tool",
+            "tools.skill_manager_tool",
+            "tools.skills_tool",
+            "tools.terminal_tool",
+            "tools.todo_tool",
+            "tools.tts_tool",
+            "tools.vision_tools",
+            "tools.web_tools",
+        }
+
+        with patch("tools.registry.importlib.import_module"):
+            imported = discover_builtin_tools(Path(__file__).resolve().parents[2] / "tools")
+
+        assert set(imported) == expected
+
+    def test_imports_only_self_registering_modules(self, tmp_path):
+        tools_dir = tmp_path / "tools"
+        tools_dir.mkdir()
+        (tools_dir / "__init__.py").write_text("", encoding="utf-8")
+        (tools_dir / "registry.py").write_text("", encoding="utf-8")
+        (tools_dir / "alpha.py").write_text(
+            "from tools.registry import registry\nregistry.register(name='alpha', toolset='x', schema={}, handler=lambda *_a, **_k: '{}')\n",
+            encoding="utf-8",
+        )
+        (tools_dir / "beta.py").write_text("VALUE = 1\n", encoding="utf-8")
+
+        with patch("tools.registry.importlib.import_module") as mock_import:
+            imported = discover_builtin_tools(tools_dir)
+
+        assert imported == ["tools.alpha"]
+        mock_import.assert_called_once_with("tools.alpha")
+
+    def test_skips_mcp_tool_even_if_it_registers(self, tmp_path):
+        tools_dir = tmp_path / "tools"
+        tools_dir.mkdir()
+        (tools_dir / "__init__.py").write_text("", encoding="utf-8")
+        (tools_dir / "mcp_tool.py").write_text(
+            "from tools.registry import registry\nregistry.register(name='mcp_alpha', toolset='mcp-test', schema={}, handler=lambda *_a, **_k: '{}')\n",
+            encoding="utf-8",
+        )
+        (tools_dir / "alpha.py").write_text(
+            "from tools.registry import registry\nregistry.register(name='alpha', toolset='x', schema={}, handler=lambda *_a, **_k: '{}')\n",
+            encoding="utf-8",
+        )
+
+        with patch("tools.registry.importlib.import_module") as mock_import:
+            imported = discover_builtin_tools(tools_dir)
+
+        assert imported == ["tools.alpha"]
+        mock_import.assert_called_once_with("tools.alpha")
+
+
 class TestEmojiMetadata:
    """Verify per-tool emoji registration and lookup."""

@@ -87,7 +87,7 @@ DANGEROUS_PATTERNS = [
    (r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
    (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
    (r'>\s*/etc/', "overwrite system config"),
-    (r'\bsystemctl\s+(stop|disable|mask)\b', "stop/disable system service"),
+    (r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),
    (r'\bkill\s+-9\s+-1\b', "kill all processes"),
    (r'\bpkill\s+-9\b', "force kill processes"),
    (r':\(\)\s*\{\s*:\s*\|\s*:\s*&\s*\}\s*;\s*:', "fork bomb"),
@@ -101,6 +101,11 @@ DANGEROUS_PATTERNS = [
    (r'\bxargs\s+.*\brm\b', "xargs with rm"),
    (r'\bfind\b.*-exec\s+(/\S*/)?rm\b', "find -exec rm"),
    (r'\bfind\b.*-delete\b', "find -delete"),
+    # Gateway lifecycle protection: prevent the agent from killing its own
+    # gateway process.  These commands trigger a gateway restart/stop that
+    # terminates all running agents mid-work.
+    (r'\bhermes\s+gateway\s+(stop|restart)\b', "stop/restart hermes gateway (kills running agents)"),
+    (r'\bhermes\s+update\b', "hermes update (restarts gateway, kills running agents)"),
    # Gateway protection: never start gateway outside systemd management
    (r'gateway\s+run\b.*(&\s*$|&\s*;|\bdisown\b|\bsetsid\b)', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
    (r'\bnohup\b.*gateway\s+run\b', "start gateway outside systemd (use 'systemctl --user restart hermes-gateway')"),
@@ -94,11 +94,21 @@ except ImportError:
 logger = logging.getLogger(__name__)

 # Standard PATH entries for environments with minimal PATH (e.g. systemd services).
-# Includes macOS Homebrew paths (/opt/homebrew/* for Apple Silicon).
-_SANE_PATH = (
-    "/opt/homebrew/bin:/opt/homebrew/sbin:"
-    "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+# Includes Android/Termux and macOS Homebrew locations needed for agent-browser,
+# npx, node, and Android's glibc runner (grun).
+_SANE_PATH_DIRS = (
+    "/data/data/com.termux/files/usr/bin",
+    "/data/data/com.termux/files/usr/sbin",
+    "/opt/homebrew/bin",
+    "/opt/homebrew/sbin",
+    "/usr/local/sbin",
+    "/usr/local/bin",
+    "/usr/sbin",
+    "/usr/bin",
+    "/sbin",
+    "/bin",
 )
+_SANE_PATH = os.pathsep.join(_SANE_PATH_DIRS)


@functools.lru_cache(maxsize=1)
@@ -123,6 +133,28 @@ def _discover_homebrew_node_dirs() -> tuple[str, ...]:
        pass
    return tuple(dirs)

+
+def _browser_candidate_path_dirs() -> list[str]:
+    """Return ordered browser CLI PATH candidates shared by discovery and execution."""
+    hermes_home = get_hermes_home()
+    hermes_node_bin = str(hermes_home / "node" / "bin")
+    return [hermes_node_bin, *list(_discover_homebrew_node_dirs()), *_SANE_PATH_DIRS]
+
+
+def _merge_browser_path(existing_path: str = "") -> str:
+    """Prepend browser-specific PATH fallbacks without reordering existing entries."""
+    path_parts = [p for p in (existing_path or "").split(os.pathsep) if p]
+    existing_parts = set(path_parts)
+    prefix_parts: list[str] = []
+
+    for part in _browser_candidate_path_dirs():
+        if not part or part in existing_parts or part in prefix_parts:
+            continue
+        if os.path.isdir(part):
+            prefix_parts.append(part)
+
+    return os.pathsep.join(prefix_parts + path_parts)
+
 # Throttle screenshot cleanup to avoid repeated full directory scans.
 _last_screenshot_cleanup_by_dir: dict[str, float] = {}

@@ -895,21 +927,10 @@ def _find_agent_browser() -> str:
        _agent_browser_resolved = True
        return which_result

-    # Build an extended search PATH including Homebrew and Hermes-managed dirs.
-    # This covers macOS where the process PATH may not include Homebrew paths.
-    extra_dirs: list[str] = []
-    for d in ["/opt/homebrew/bin", "/usr/local/bin"]:
-        if os.path.isdir(d):
-            extra_dirs.append(d)
-    extra_dirs.extend(_discover_homebrew_node_dirs())
-
-    hermes_home = get_hermes_home()
-    hermes_node_bin = str(hermes_home / "node" / "bin")
-    if os.path.isdir(hermes_node_bin):
-        extra_dirs.append(hermes_node_bin)
-
-    if extra_dirs:
-        extended_path = os.pathsep.join(extra_dirs)
+    # Build an extended search PATH including Hermes-managed Node, macOS
+    # versioned Homebrew installs, and fallback system dirs like Termux.
+    extended_path = _merge_browser_path("")
+    if extended_path:
        which_result = shutil.which("agent-browser", path=extended_path)
        if which_result:
            _cached_agent_browser = which_result
@@ -924,10 +945,10 @@ def _find_agent_browser() -> str:
        _agent_browser_resolved = True
        return _cached_agent_browser
    
-    # Check common npx locations (also search extended dirs)
+    # Check common npx locations (also search the extended fallback PATH)
    npx_path = shutil.which("npx")
-    if not npx_path and extra_dirs:
-        npx_path = shutil.which("npx", path=os.pathsep.join(extra_dirs))
+    if not npx_path and extended_path:
+        npx_path = shutil.which("npx", path=extended_path)
    if npx_path:
        _cached_agent_browser = "npx agent-browser"
        _agent_browser_resolved = True
@@ -1046,24 +1067,9 @@ def _run_browser_command(
        
        browser_env = {**os.environ}

-        # Ensure PATH includes Hermes-managed Node first, Homebrew versioned
-        # node dirs (for macOS ``brew install node@24``), then standard system dirs.
-        hermes_home = get_hermes_home()
-        hermes_node_bin = str(hermes_home / "node" / "bin")
-
-        existing_path = browser_env.get("PATH", "")
-        path_parts = [p for p in existing_path.split(":") if p]
-        candidate_dirs = (
-            [hermes_node_bin]
-            + list(_discover_homebrew_node_dirs())
-            + [p for p in _SANE_PATH.split(":") if p]
-        )
-
-        for part in reversed(candidate_dirs):
-            if os.path.isdir(part) and part not in path_parts:
-                path_parts.insert(0, part)
-
-        browser_env["PATH"] = ":".join(path_parts)
+        # Ensure subprocesses inherit the same browser-specific PATH fallbacks
+        # used during CLI discovery.
+        browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
        browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
        
        # Use temp files for stdout/stderr instead of pipes.
@@ -99,23 +99,41 @@ def _load_hermes_env_vars() -> dict[str, str]:


 def find_docker() -> Optional[str]:
-    """Locate the docker CLI binary.
+    """Locate the docker (or podman) CLI binary.

-    Checks ``shutil.which`` first (respects PATH), then probes well-known
-    install locations on macOS where Docker Desktop may not be in PATH
-    (e.g. when running as a gateway service via launchd).
+    Resolution order:
+    1. ``HERMES_DOCKER_BINARY`` env var — explicit override (e.g. ``/usr/bin/podman``)
+    2. ``docker`` on PATH via ``shutil.which``
+    3. ``podman`` on PATH via ``shutil.which``
+    4. Well-known macOS Docker Desktop install locations

-    Returns the absolute path, or ``None`` if docker cannot be found.
+    Returns the absolute path, or ``None`` if neither runtime can be found.
    """
    global _docker_executable
    if _docker_executable is not None:
        return _docker_executable

+    # 1. Explicit override via env var (e.g. for Podman on immutable distros)
+    override = os.getenv("HERMES_DOCKER_BINARY")
+    if override and os.path.isfile(override) and os.access(override, os.X_OK):
+        _docker_executable = override
+        logger.info("Using HERMES_DOCKER_BINARY override: %s", override)
+        return override
+
+    # 2. docker on PATH
    found = shutil.which("docker")
    if found:
        _docker_executable = found
        return found

+    # 3. podman on PATH (drop-in compatible for our use case)
+    found = shutil.which("podman")
+    if found:
+        _docker_executable = found
+        logger.info("Using podman as container runtime: %s", found)
+        return found
+
+    # 4. Well-known macOS Docker Desktop locations
    for path in _DOCKER_SEARCH_PATHS:
        if os.path.isfile(path) and os.access(path, os.X_OK):
            _docker_executable = path
@@ -219,6 +219,58 @@ def _sanitize_error(text: str) -> str:
    return _CREDENTIAL_PATTERN.sub("[REDACTED]", text)


+# ---------------------------------------------------------------------------
+# MCP tool description content scanning
+# ---------------------------------------------------------------------------
+
+# Patterns that indicate potential prompt injection in MCP tool descriptions.
+# These are WARNING-level — we log but don't block, since false positives
+# would break legitimate MCP servers.
+_MCP_INJECTION_PATTERNS = [
+    (re.compile(r"ignore\s+(all\s+)?previous\s+instructions", re.I),
+     "prompt override attempt ('ignore previous instructions')"),
+    (re.compile(r"you\s+are\s+now\s+a", re.I),
+     "identity override attempt ('you are now a...')"),
+    (re.compile(r"your\s+new\s+(task|role|instructions?)\s+(is|are)", re.I),
+     "task override attempt"),
+    (re.compile(r"system\s*:\s*", re.I),
+     "system prompt injection attempt"),
+    (re.compile(r"<\s*(system|human|assistant)\s*>", re.I),
+     "role tag injection attempt"),
+    (re.compile(r"do\s+not\s+(tell|inform|mention|reveal)", re.I),
+     "concealment instruction"),
+    (re.compile(r"(curl|wget|fetch)\s+https?://", re.I),
+     "network command in description"),
+    (re.compile(r"base64\.(b64decode|decodebytes)", re.I),
+     "base64 decode reference"),
+    (re.compile(r"exec\s*\(|eval\s*\(", re.I),
+     "code execution reference"),
+    (re.compile(r"import\s+(subprocess|os|shutil|socket)", re.I),
+     "dangerous import reference"),
+]
+
+
+def _scan_mcp_description(server_name: str, tool_name: str, description: str) -> List[str]:
+    """Scan an MCP tool description for prompt injection patterns.
+
+    Returns a list of finding strings (empty = clean).
+    """
+    findings = []
+    if not description:
+        return findings
+    for pattern, reason in _MCP_INJECTION_PATTERNS:
+        if pattern.search(description):
+            findings.append(reason)
+    if findings:
+        logger.warning(
+            "MCP server '%s' tool '%s': suspicious description content — %s. "
+            "Description: %.200s",
+            server_name, tool_name, "; ".join(findings),
+            description,
+        )
+    return findings
+
+
 def _prepend_path(env: dict, directory: str) -> dict:
    """Prepend *directory* to env PATH if it is not already present."""
    updated = dict(env or {})
@@ -794,33 +846,46 @@ class MCPServerTask:
        After the initial ``await`` (list_tools), all mutations are synchronous
        — atomic from the event loop's perspective.
        """
-        from tools.registry import registry, tool_error
-        from toolsets import TOOLSETS
+        from tools.registry import registry

        async with self._refresh_lock:
+            # Capture old tool names for change diff
+            old_tool_names = set(self._registered_tool_names)
+
            # 1. Fetch current tool list from server
            tools_result = await self.session.list_tools()
            new_mcp_tools = tools_result.tools if hasattr(tools_result, "tools") else []

-            # 2. Remove old tools from hermes-* umbrella toolsets
-            for ts_name, ts in TOOLSETS.items():
-                if ts_name.startswith("hermes-"):
-                    ts["tools"] = [t for t in ts["tools"] if t not in self._registered_tool_names]
-
-            # 3. Deregister old tools from the central registry
+            # 2. Deregister old tools from the central registry
            for prefixed_name in self._registered_tool_names:
                registry.deregister(prefixed_name)

-            # 4. Re-register with fresh tool list
+            # 3. Re-register with fresh tool list
            self._tools = new_mcp_tools
            self._registered_tool_names = _register_server_tools(
                self.name, self, self._config
            )

-            logger.info(
-                "MCP server '%s': dynamically refreshed %d tool(s)",
-                self.name, len(self._registered_tool_names),
-            )
+            # 5. Log what changed (user-visible notification)
+            new_tool_names = set(self._registered_tool_names)
+            added = new_tool_names - old_tool_names
+            removed = old_tool_names - new_tool_names
+            changes = []
+            if added:
+                changes.append(f"added: {', '.join(sorted(added))}")
+            if removed:
+                changes.append(f"removed: {', '.join(sorted(removed))}")
+            if changes:
+                logger.warning(
+                    "MCP server '%s': tools changed dynamically — %s. "
+                    "Verify these changes are expected.",
+                    self.name, "; ".join(changes),
+                )
+            else:
+                logger.info(
+                    "MCP server '%s': dynamically refreshed %d tool(s) (no changes)",
+                    self.name, len(self._registered_tool_names),
+                )

    async def _run_stdio(self, config: dict):
        """Run the server using stdio transport."""
@@ -1073,6 +1138,8 @@ class MCPServerTask:

    async def shutdown(self):
        """Signal the Task to exit and wait for clean resource teardown."""
+        from tools.registry import registry
+
        self._shutdown_event.set()
        if self._task and not self._task.done():
            try:
@@ -1087,6 +1154,9 @@ class MCPServerTask:
                    await self._task
                except asyncio.CancelledError:
                    pass
+        for tool_name in list(getattr(self, "_registered_tool_names", [])):
+            registry.deregister(tool_name)
+        self._registered_tool_names = []
        self.session = None


@@ -1600,57 +1670,6 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
    }


-def _sync_mcp_toolsets(server_names: Optional[List[str]] = None) -> None:
-    """Expose each MCP server as a standalone toolset and inject into hermes-* sets.
-
-    Creates a real toolset entry in TOOLSETS for each server name (e.g.
-    TOOLSETS["github"] = {"tools": ["mcp_github_list_files", ...]}). This
-    makes raw server names resolvable in platform_toolsets overrides.
-
-    Also injects all MCP tools into hermes-* umbrella toolsets for the
-    default behavior.
-
-    Skips server names that collide with built-in toolsets.
-    """
-    from toolsets import TOOLSETS
-
-    if server_names is None:
-        server_names = list(_load_mcp_config().keys())
-
-    existing = _existing_tool_names()
-    all_mcp_tools: List[str] = []
-
-    for server_name in server_names:
-        safe_prefix = f"mcp_{sanitize_mcp_name_component(server_name)}_"
-        server_tools = sorted(
-            t for t in existing if t.startswith(safe_prefix)
-        )
-        all_mcp_tools.extend(server_tools)
-
-        # Don't overwrite a built-in toolset that happens to share the name.
-        existing_ts = TOOLSETS.get(server_name)
-        if existing_ts and not str(existing_ts.get("description", "")).startswith("MCP server '"):
-            logger.warning(
-                "Skipping MCP toolset alias '%s' — a built-in toolset already uses that name",
-                server_name,
-            )
-            continue
-
-        TOOLSETS[server_name] = {
-            "description": f"MCP server '{server_name}' tools",
-            "tools": server_tools,
-            "includes": [],
-        }
-
-    # Also inject into hermes-* umbrella toolsets for default behavior.
-    for ts_name, ts in TOOLSETS.items():
-        if not ts_name.startswith("hermes-"):
-            continue
-        for tool_name in all_mcp_tools:
-            if tool_name not in ts["tools"]:
-                ts["tools"].append(tool_name)
-
-
 def _build_utility_schemas(server_name: str) -> List[dict]:
    """Build schemas for the MCP utility tools (resources & prompts).

@@ -1803,16 +1822,16 @@ def _existing_tool_names() -> List[str]:
 def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> List[str]:
    """Register tools from an already-connected server into the registry.

-    Handles include/exclude filtering, utility tools, toolset creation,
-    and hermes-* umbrella toolset injection.
+    Handles include/exclude filtering and utility tools. Toolset resolution
+    for ``mcp-{server}`` and raw server-name aliases is derived from the live
+    registry, rather than mutating ``toolsets.TOOLSETS`` at runtime.

    Used by both initial discovery and dynamic refresh (list_changed).

    Returns:
        List of registered prefixed tool names.
    """
-    from tools.registry import registry, tool_error
-    from toolsets import create_custom_toolset, TOOLSETS
+    from tools.registry import registry

    registered_names: List[str] = []
    toolset_name = f"mcp-{name}"
@@ -1838,6 +1857,10 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li
        if not _should_register(mcp_tool.name):
            logger.debug("MCP server '%s': skipping tool '%s' (filtered by config)", name, mcp_tool.name)
            continue
+
+        # Scan tool description for prompt injection patterns
+        _scan_mcp_description(name, mcp_tool.name, mcp_tool.description or "")
+
        schema = _convert_mcp_schema(name, mcp_tool)
        tool_name_prefixed = schema["name"]

@@ -1898,19 +1921,8 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li
        )
        registered_names.append(util_name)

-    # Create a custom toolset so these tools are discoverable
    if registered_names:
-        create_custom_toolset(
-            name=toolset_name,
-            description=f"MCP tools from {name} server",
-            tools=registered_names,
-        )
-        # Inject into hermes-* umbrella toolsets for default behavior
-        for ts_name, ts in TOOLSETS.items():
-            if ts_name.startswith("hermes-"):
-                for tool_name in registered_names:
-                    if tool_name not in ts["tools"]:
-                        ts["tools"].append(tool_name)
+        registry.register_toolset_alias(name, toolset_name)

    return registered_names

@@ -1974,7 +1986,6 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
        }

    if not new_servers:
-        _sync_mcp_toolsets(list(servers.keys()))
        return _existing_tool_names()

    # Start the background event loop for MCP connections
@@ -2005,8 +2016,6 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
    # The outer timeout is generous: 120s total for parallel discovery.
    _run_on_mcp_loop(_discover_all(), timeout=120)

-    _sync_mcp_toolsets(list(servers.keys()))
-
    # Log a summary so ACP callers get visibility into what was registered.
    with _lock:
        connected = [n for n in new_servers if n in _servers]
@@ -2027,7 +2036,7 @@ def register_mcp_servers(servers: Dict[str, dict]) -> List[str]:
 def discover_mcp_tools() -> List[str]:
    """Entry point: load config, connect to MCP servers, register tools.

-    Called from ``model_tools._discover_tools()``. Safe to call even when
+    Called from ``model_tools`` after ``discover_builtin_tools()``. Safe to call even when
    the ``mcp`` package is not installed (returns empty list).

    Idempotent for already-connected servers. If some servers failed on a
@@ -14,14 +14,65 @@ Import chain (circular-import safe):
    run_agent.py, cli.py, batch_runner.py, etc.
 """

+import ast
+import importlib
 import json
 import logging
 import threading
+from pathlib import Path
 from typing import Callable, Dict, List, Optional, Set

 logger = logging.getLogger(__name__)


+def _is_registry_register_call(node: ast.AST) -> bool:
+    """Return True when *node* is a ``registry.register(...)`` call expression."""
+    if not isinstance(node, ast.Expr) or not isinstance(node.value, ast.Call):
+        return False
+    func = node.value.func
+    return (
+        isinstance(func, ast.Attribute)
+        and func.attr == "register"
+        and isinstance(func.value, ast.Name)
+        and func.value.id == "registry"
+    )
+
+
+def _module_registers_tools(module_path: Path) -> bool:
+    """Return True when the module contains a top-level ``registry.register(...)`` call.
+
+    Only inspects module-body statements so that helper modules which happen
+    to call ``registry.register()`` inside a function are not picked up.
+    """
+    try:
+        source = module_path.read_text(encoding="utf-8")
+        tree = ast.parse(source, filename=str(module_path))
+    except (OSError, SyntaxError):
+        return False
+
+    return any(_is_registry_register_call(stmt) for stmt in tree.body)
+
+
+def discover_builtin_tools(tools_dir: Optional[Path] = None) -> List[str]:
+    """Import built-in self-registering tool modules and return their module names."""
+    tools_path = Path(tools_dir) if tools_dir is not None else Path(__file__).resolve().parent
+    module_names = [
+        f"tools.{path.stem}"
+        for path in sorted(tools_path.glob("*.py"))
+        if path.name not in {"__init__.py", "registry.py", "mcp_tool.py"}
+        and _module_registers_tools(path)
+    ]
+
+    imported: List[str] = []
+    for mod_name in module_names:
+        try:
+            importlib.import_module(mod_name)
+            imported.append(mod_name)
+        except Exception as e:
+            logger.warning("Could not import tool module %s: %s", mod_name, e)
+    return imported
+
+
 class ToolEntry:
    """Metadata for a single registered tool."""

@@ -52,6 +103,7 @@ class ToolRegistry:
    def __init__(self):
        self._tools: Dict[str, ToolEntry] = {}
        self._toolset_checks: Dict[str, Callable] = {}
+        self._toolset_aliases: Dict[str, str] = {}
        # MCP dynamic refresh can mutate the registry while other threads are
        # reading tool metadata, so keep mutations serialized and readers on
        # stable snapshots.
@@ -96,6 +148,27 @@ class ToolRegistry:
            if entry.toolset == toolset
        )

+    def register_toolset_alias(self, alias: str, toolset: str) -> None:
+        """Register an explicit alias for a canonical toolset name."""
+        with self._lock:
+            existing = self._toolset_aliases.get(alias)
+            if existing and existing != toolset:
+                logger.warning(
+                    "Toolset alias collision: '%s' (%s) overwritten by %s",
+                    alias, existing, toolset,
+                )
+            self._toolset_aliases[alias] = toolset
+
+    def get_registered_toolset_aliases(self) -> Dict[str, str]:
+        """Return a snapshot of ``{alias: canonical_toolset}`` mappings."""
+        with self._lock:
+            return dict(self._toolset_aliases)
+
+    def get_toolset_alias_target(self, alias: str) -> Optional[str]:
+        """Return the canonical toolset name for an alias, or None."""
+        with self._lock:
+            return self._toolset_aliases.get(alias)
+
    # ------------------------------------------------------------------
    # Registration
    # ------------------------------------------------------------------
@@ -117,11 +190,27 @@ class ToolRegistry:
        with self._lock:
            existing = self._tools.get(name)
            if existing and existing.toolset != toolset:
-                logger.warning(
-                    "Tool name collision: '%s' (toolset '%s') is being "
-                    "overwritten by toolset '%s'",
-                    name, existing.toolset, toolset,
+                # Allow MCP-to-MCP overwrites (legitimate: server refresh,
+                # or two MCP servers with overlapping tool names).
+                both_mcp = (
+                    existing.toolset.startswith("mcp-")
+                    and toolset.startswith("mcp-")
                )
+                if both_mcp:
+                    logger.debug(
+                        "Tool '%s': MCP toolset '%s' overwriting MCP toolset '%s'",
+                        name, toolset, existing.toolset,
+                    )
+                else:
+                    # Reject shadowing — prevent plugins/MCP from overwriting
+                    # built-in tools or vice versa.
+                    logger.error(
+                        "Tool registration REJECTED: '%s' (toolset '%s') would "
+                        "shadow existing tool from toolset '%s'. Deregister the "
+                        "existing tool first if this is intentional.",
+                        name, toolset, existing.toolset,
+                    )
+                    return
            self._tools[name] = ToolEntry(
                name=name,
                toolset=toolset,
@@ -148,11 +237,18 @@ class ToolRegistry:
            entry = self._tools.pop(name, None)
            if entry is None:
                return
-            # Drop the toolset check if this was the last tool in that toolset
-            if entry.toolset in self._toolset_checks and not any(
+            # Drop the toolset check and aliases if this was the last tool in
+            # that toolset.
+            toolset_still_exists = any(
                e.toolset == entry.toolset for e in self._tools.values()
-            ):
+            )
+            if not toolset_still_exists:
                self._toolset_checks.pop(entry.toolset, None)
+                self._toolset_aliases = {
+                    alias: target
+                    for alias, target in self._toolset_aliases.items()
+                    if target != entry.toolset
+                }
        logger.debug("Deregistered tool: %s", name)

    # ------------------------------------------------------------------
@@ -64,11 +64,11 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
            report = format_scan_report(result)
            return f"Security scan blocked this skill ({reason}):\n{report}"
        if allowed is None:
-            # "ask" — allow but include the warning so the user sees the findings
+            # "ask" verdict — for agent-created skills this means dangerous
+            # findings were detected.  Block the skill and include the report.
            report = format_scan_report(result)
-            logger.warning("Agent-created skill has security findings: %s", reason)
-            # Don't block — return None to allow, but log the warning
-            return None
+            logger.warning("Agent-created skill blocked (dangerous findings): %s", reason)
+            return f"Security scan blocked this skill ({reason}):\n{report}"
    except Exception as e:
        logger.warning("Security scan failed for %s: %s", skill_dir, e, exc_info=True)
    return None
@@ -409,8 +409,39 @@ def get_toolset(name: str) -> Optional[Dict[str, Any]]:
        Dict: Toolset definition with description, tools, and includes
        None: If toolset not found
    """
-    # Return toolset definition
-    return TOOLSETS.get(name)
+    toolset = TOOLSETS.get(name)
+    if toolset:
+        return toolset
+
+    try:
+        from tools.registry import registry
+    except Exception:
+        return None
+
+    registry_toolset = name
+    description = f"Plugin toolset: {name}"
+    alias_target = registry.get_toolset_alias_target(name)
+
+    if name not in _get_plugin_toolset_names():
+        registry_toolset = alias_target
+        if not registry_toolset:
+            return None
+        description = f"MCP server '{name}' tools"
+    else:
+        reverse_aliases = {
+            canonical: alias
+            for alias, canonical in _get_registry_toolset_aliases().items()
+            if alias not in TOOLSETS
+        }
+        alias = reverse_aliases.get(name)
+        if alias:
+            description = f"MCP server '{alias}' tools"
+
+    return {
+        "description": description,
+        "tools": registry.get_tool_names_for_toolset(registry_toolset),
+        "includes": [],
+    }


 def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]:
@@ -438,7 +469,7 @@ def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]:
            # Use a fresh visited set per branch to avoid cross-branch contamination
            resolved = resolve_toolset(toolset_name, visited.copy())
            all_tools.update(resolved)
-        return list(all_tools)
+        return sorted(all_tools)

    # Check for cycles / already-resolved (diamond deps).
    # Silently return [] — either this is a diamond (not a bug, tools already
@@ -449,15 +480,8 @@ def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]:
    visited.add(name)

    # Get toolset definition
-    toolset = TOOLSETS.get(name)
+    toolset = get_toolset(name)
    if not toolset:
-        # Fall back to tool registry for plugin-provided toolsets
-        if name in _get_plugin_toolset_names():
-            try:
-                from tools.registry import registry
-                return registry.get_tool_names_for_toolset(name)
-            except Exception:
-                pass
        return []

    # Collect direct tools
@@ -470,7 +494,7 @@ def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]:
        included_tools = resolve_toolset(included_name, visited)
        tools.update(included_tools)
    
-    return list(tools)
+    return sorted(tools)


 def resolve_multiple_toolsets(toolset_names: List[str]) -> List[str]:
@@ -489,7 +513,7 @@ def resolve_multiple_toolsets(toolset_names: List[str]) -> List[str]:
        tools = resolve_toolset(name)
        all_tools.update(tools)
    
-    return list(all_tools)
+    return sorted(all_tools)


 def _get_plugin_toolset_names() -> Set[str]:
@@ -509,6 +533,15 @@ def _get_plugin_toolset_names() -> Set[str]:
        return set()


+def _get_registry_toolset_aliases() -> Dict[str, str]:
+    """Return explicit toolset aliases registered in the live registry."""
+    try:
+        from tools.registry import registry
+        return registry.get_registered_toolset_aliases()
+    except Exception:
+        return {}
+
+
 def get_all_toolsets() -> Dict[str, Dict[str, Any]]:
    """
    Get all available toolsets with their definitions.
@@ -518,19 +551,19 @@ def get_all_toolsets() -> Dict[str, Dict[str, Any]]:
    Returns:
        Dict: All toolset definitions
    """
-    result = TOOLSETS.copy()
-    # Add plugin-provided toolsets (synthetic entries)
+    result = dict(TOOLSETS)
+    aliases = _get_registry_toolset_aliases()
    for ts_name in _get_plugin_toolset_names():
-        if ts_name not in result:
-            try:
-                from tools.registry import registry
-                tools = registry.get_tool_names_for_toolset(ts_name)
-                result[ts_name] = {
-                    "description": f"Plugin toolset: {ts_name}",
-                    "tools": tools,
-                }
-            except Exception:
-                pass
+        display_name = ts_name
+        for alias, canonical in aliases.items():
+            if canonical == ts_name and alias not in TOOLSETS:
+                display_name = alias
+                break
+        if display_name in result:
+            continue
+        toolset = get_toolset(display_name)
+        if toolset:
+            result[display_name] = toolset
    return result


@@ -544,7 +577,14 @@ def get_toolset_names() -> List[str]:
        List[str]: List of toolset names
    """
    names = set(TOOLSETS.keys())
-    names |= _get_plugin_toolset_names()
+    aliases = _get_registry_toolset_aliases()
+    for ts_name in _get_plugin_toolset_names():
+        for alias, canonical in aliases.items():
+            if canonical == ts_name and alias not in TOOLSETS:
+                names.add(alias)
+                break
+        else:
+            names.add(ts_name)
    return sorted(names)


@@ -565,8 +605,9 @@ def validate_toolset(name: str) -> bool:
        return True
    if name in TOOLSETS:
        return True
-    # Check tool registry for plugin-provided toolsets
-    return name in _get_plugin_toolset_names()
+    if name in _get_plugin_toolset_names():
+        return True
+    return name in _get_registry_toolset_aliases()


 def create_custom_toolset(
@@ -80,6 +80,7 @@ export const en: Translations = {
    notRunning: "Not running",
    startFailed: "Start failed",
    pid: "PID",
+    runningRemote: "Running (remote)",
    noneRunning: "None",
    gatewayFailedToStart: "Gateway failed to start",
    lastUpdate: "Last update",
@@ -111,11 +112,14 @@ export const en: Translations = {
    totalTokens: "Total Tokens",
    totalSessions: "Total Sessions",
    apiCalls: "API Calls",
+    cacheHitRate: "Cache Hit Rate",
    dailyTokenUsage: "Daily Token Usage",
    dailyBreakdown: "Daily Breakdown",
    perModelBreakdown: "Per-Model Breakdown",
+    prompt: "Prompt",
    input: "Input",
    output: "Output",
+    cached: "cached",
    total: "Total",
    noUsageData: "No usage data for this period",
    startSession: "Start a session to see analytics here",
@@ -124,7 +128,6 @@ export const en: Translations = {
    tokens: "Tokens",
    perDayAvg: "/day avg",
    acrossModels: "across {count} models",
-    inOut: "{input} in / {output} out",
  },

  logs: {
@@ -83,6 +83,7 @@ export interface Translations {
    notRunning: string;
    startFailed: string;
    pid: string;
+    runningRemote: string;
    noneRunning: string;
    gatewayFailedToStart: string;
    lastUpdate: string;
@@ -116,11 +117,14 @@ export interface Translations {
    totalTokens: string;
    totalSessions: string;
    apiCalls: string;
+    cacheHitRate: string;
    dailyTokenUsage: string;
    dailyBreakdown: string;
    perModelBreakdown: string;
+    prompt: string;
    input: string;
    output: string;
+    cached: string;
    total: string;
    noUsageData: string;
    startSession: string;
@@ -129,7 +133,6 @@ export interface Translations {
    tokens: string;
    perDayAvg: string;
    acrossModels: string;
-    inOut: string;
  };

  // ── Logs page ──
@@ -80,6 +80,7 @@ export const zh: Translations = {
    notRunning: "未运行",
    startFailed: "启动失败",
    pid: "进程",
+    runningRemote: "运行中（远程）",
    noneRunning: "无",
    gatewayFailedToStart: "网关启动失败",
    lastUpdate: "最后更新",
@@ -111,11 +112,14 @@ export const zh: Translations = {
    totalTokens: "总 Token 数",
    totalSessions: "总会话数",
    apiCalls: "API 调用",
+    cacheHitRate: "缓存命中率",
    dailyTokenUsage: "每日 Token 用量",
    dailyBreakdown: "每日明细",
    perModelBreakdown: "模型用量明细",
+    prompt: "提示",
    input: "输入",
    output: "输出",
+    cached: "已缓存",
    total: "总计",
    noUsageData: "该时间段暂无使用数据",
    startSession: "开始会话后将在此显示分析数据",
@@ -124,7 +128,6 @@ export const zh: Translations = {
    tokens: "Token",
    perDayAvg: "/天 平均",
    acrossModels: "共 {count} 个模型",
-    inOut: "输入 {input} / 输出 {output}",
  },

  logs: {
@@ -269,18 +269,23 @@ export interface AnalyticsDailyEntry {
  input_tokens: number;
  output_tokens: number;
  cache_read_tokens: number;
+  cache_write_tokens: number;
  reasoning_tokens: number;
  estimated_cost: number;
  actual_cost: number;
  sessions: number;
+  api_calls: number;
 }

 export interface AnalyticsModelEntry {
  model: string;
  input_tokens: number;
  output_tokens: number;
+  cache_read_tokens: number;
+  cache_write_tokens: number;
  estimated_cost: number;
  sessions: number;
+  api_calls: number;
 }

 export interface AnalyticsResponse {
@@ -290,10 +295,12 @@ export interface AnalyticsResponse {
    total_input: number;
    total_output: number;
    total_cache_read: number;
+    total_cache_write: number;
    total_reasoning: number;
    total_estimated_cost: number;
    total_actual_cost: number;
    total_sessions: number;
+    total_api_calls: number;
  };
 }

@@ -4,6 +4,7 @@ import {
  Cpu,
  Hash,
  TrendingUp,
+  Zap,
 } from "lucide-react";
 import { api } from "@/lib/api";
 import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api";
@@ -19,6 +20,11 @@ const PERIODS = [

 const CHART_HEIGHT_PX = 160;

+/** Compute total prompt tokens (input + cache_read + cache_write). */
+function getPromptTokens(d: { input_tokens: number; cache_read_tokens?: number; cache_write_tokens?: number }): number {
+  return d.input_tokens + (d.cache_read_tokens ?? 0) + (d.cache_write_tokens ?? 0);
+}
+
 function formatTokens(n: number): string {
  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
@@ -63,7 +69,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
  const { t } = useI18n();
  if (daily.length === 0) return null;

-  const maxTokens = Math.max(...daily.map((d) => d.input_tokens + d.output_tokens), 1);
+  const maxTokens = Math.max(...daily.map((d) => getPromptTokens(d) + d.output_tokens), 1);

  return (
    <Card>
@@ -75,7 +81,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
          <div className="flex items-center gap-4 text-xs text-muted-foreground">
          <div className="flex items-center gap-1.5">
            <div className="h-2.5 w-2.5 bg-[#ffe6cb]" />
-            {t.analytics.input}
+            {t.analytics.prompt}
          </div>
          <div className="flex items-center gap-1.5">
            <div className="h-2.5 w-2.5 bg-emerald-500" />
@@ -86,8 +92,9 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
      <CardContent>
        <div className="flex items-end gap-[2px]" style={{ height: CHART_HEIGHT_PX }}>
          {daily.map((d) => {
-            const total = d.input_tokens + d.output_tokens;
-            const inputH = Math.round((d.input_tokens / maxTokens) * CHART_HEIGHT_PX);
+            const promptTokens = getPromptTokens(d);
+            const total = promptTokens + d.output_tokens;
+            const inputH = Math.round((promptTokens / maxTokens) * CHART_HEIGHT_PX);
            const outputH = Math.round((d.output_tokens / maxTokens) * CHART_HEIGHT_PX);
            return (
              <div
@@ -99,7 +106,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
                <div className="absolute bottom-full left-1/2 -translate-x-1/2 mb-2 hidden group-hover:block z-10 pointer-events-none">
                  <div className="bg-card border border-border px-2.5 py-1.5 text-[10px] text-foreground shadow-lg whitespace-nowrap">
                    <div className="font-medium">{formatDate(d.day)}</div>
-                    <div>{t.analytics.input}: {formatTokens(d.input_tokens)}</div>
+                    <div>{t.analytics.prompt}: {formatTokens(promptTokens)}</div>
                    <div>{t.analytics.output}: {formatTokens(d.output_tokens)}</div>
                    <div>{t.analytics.total}: {formatTokens(total)}</div>
                  </div>
@@ -152,18 +159,19 @@ function DailyTable({ daily }: { daily: AnalyticsDailyEntry[] }) {
              <tr className="border-b border-border text-muted-foreground text-xs">
                <th className="text-left py-2 pr-4 font-medium">{t.analytics.date}</th>
                <th className="text-right py-2 px-4 font-medium">{t.sessions.title}</th>
-                <th className="text-right py-2 px-4 font-medium">{t.analytics.input}</th>
+                <th className="text-right py-2 px-4 font-medium">{t.analytics.prompt}</th>
                <th className="text-right py-2 pl-4 font-medium">{t.analytics.output}</th>
              </tr>
            </thead>
            <tbody>
              {sorted.map((d) => {
+                const promptTokens = getPromptTokens(d);
                return (
                  <tr key={d.day} className="border-b border-border/50 hover:bg-secondary/20 transition-colors">
                    <td className="py-2 pr-4 font-medium">{formatDate(d.day)}</td>
                    <td className="text-right py-2 px-4 text-muted-foreground">{d.sessions}</td>
                    <td className="text-right py-2 px-4">
-                      <span className="text-[#ffe6cb]">{formatTokens(d.input_tokens)}</span>
+                      <span className="text-[#ffe6cb]">{formatTokens(promptTokens)}</span>
                    </td>
                    <td className="text-right py-2 pl-4">
                      <span className="text-emerald-400">{formatTokens(d.output_tokens)}</span>
@@ -184,7 +192,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
  if (models.length === 0) return null;

  const sorted = [...models].sort(
-    (a, b) => b.input_tokens + b.output_tokens - (a.input_tokens + a.output_tokens),
+    (a, b) => (getPromptTokens(b) + b.output_tokens) - (getPromptTokens(a) + a.output_tokens),
  );

  return (
@@ -213,7 +221,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
                  </td>
                  <td className="text-right py-2 px-4 text-muted-foreground">{m.sessions}</td>
                  <td className="text-right py-2 pl-4">
-                    <span className="text-[#ffe6cb]">{formatTokens(m.input_tokens)}</span>
+                    <span className="text-[#ffe6cb]">{formatTokens(getPromptTokens(m))}</span>
                    {" / "}
                    <span className="text-emerald-400">{formatTokens(m.output_tokens)}</span>
                  </td>
@@ -283,12 +291,17 @@ export default function AnalyticsPage() {
      {data && (
        <>
          {/* Summary cards */}
-          <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
+          <div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
            <SummaryCard
              icon={Hash}
              label={t.analytics.totalTokens}
-              value={formatTokens(data.totals.total_input + data.totals.total_output)}
-              sub={t.analytics.inOut.replace("{input}", formatTokens(data.totals.total_input)).replace("{output}", formatTokens(data.totals.total_output))}
+              value={formatTokens(
+                (data.totals.total_input ?? 0) +
+                (data.totals.total_cache_read ?? 0) +
+                (data.totals.total_cache_write ?? 0) +
+                (data.totals.total_output ?? 0)
+              )}
+              sub={`${formatTokens((data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0) + (data.totals.total_cache_write ?? 0))} ${t.analytics.prompt} / ${formatTokens(data.totals.total_output ?? 0)} ${t.analytics.output.toLowerCase()}`}
            />
            <SummaryCard
              icon={BarChart3}
@@ -297,11 +310,25 @@ export default function AnalyticsPage() {
              sub={`~${(data.totals.total_sessions / days).toFixed(1)}${t.analytics.perDayAvg}`}
            />
            <SummaryCard
-              icon={TrendingUp}
+              icon={Zap}
              label={t.analytics.apiCalls}
-              value={String(data.daily.reduce((sum, d) => sum + d.sessions, 0))}
+              value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))}
              sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))}
            />
+            {(() => {
+              const promptSent = (data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0);
+              const rate = promptSent > 0
+                ? `${((data.totals.total_cache_read ?? 0) / promptSent * 100).toFixed(0)}%`
+                : "—";
+              return (
+                <SummaryCard
+                  icon={TrendingUp}
+                  label={t.analytics.cacheHitRate}
+                  value={rate}
+                  sub={`${formatTokens(data.totals.total_cache_read ?? 0)} ${t.analytics.cached}`}
+                />
+              );
+            })()}
          </div>

          {/* Bar chart */}
@@ -53,7 +53,8 @@ export default function StatusPage() {
  };

  function gatewayValue(): string {
-    if (status!.gateway_running) return `${t.status.pid} ${status!.gateway_pid}`;
+    if (status!.gateway_running && status!.gateway_pid) return `${t.status.pid} ${status!.gateway_pid}`;
+    if (status!.gateway_running) return t.status.runningRemote;
    if (status!.gateway_state === "startup_failed") return t.status.startFailed;
    return t.status.notRunning;
  }
@@ -14,11 +14,12 @@ Make it a **Tool** when it requires end-to-end integration with API keys, custom

 ## Overview

-Adding a tool touches **3 files**:
+Adding a tool touches **2 files**:

 1. **`tools/your_tool.py`** — handler, schema, check function, `registry.register()` call
 2. **`toolsets.py`** — add tool name to `_HERMES_CORE_TOOLS` (or a specific toolset)
-3. **`model_tools.py`** — add `"tools.your_tool"` to the `_discover_tools()` list
+
+Any `tools/*.py` file with a top-level `registry.register()` call is auto-discovered at startup — no manual import list required.

 ## Step 1: Create the Tool File

@@ -124,19 +125,9 @@ _HERMES_CORE_TOOLS = [
 },
 ```

-## Step 3: Add Discovery Import
+## ~~Step 3: Add Discovery Import~~ (No longer needed)

-In `model_tools.py`, add the module to the `_discover_tools()` list:
-
-```python
-def _discover_tools():
-    _modules = [
-        ...
-        "tools.weather_tool",  # <-- add here
-    ]
-```
-
-This import triggers the `registry.register()` call at the bottom of your tool file.
+Tool modules with a top-level `registry.register()` call are auto-discovered by `discover_builtin_tools()` in `tools/registry.py`. No manual import list to maintain — just create your file in `tools/` and it's picked up at startup.

 ## Async Handlers

@@ -275,4 +275,4 @@ model_tools.py  (imports tools/registry + triggers tool discovery)
 run_agent.py, cli.py, batch_runner.py, environments/
 ```

-This chain means tool registration happens at import time, before any agent instance is created. Adding a new tool requires an import in `model_tools.py`'s `_discover_tools()` list.
+This chain means tool registration happens at import time, before any agent instance is created. Any `tools/*.py` file with a top-level `registry.register()` call is auto-discovered — no manual import list needed.
@@ -42,37 +42,23 @@ registry.register(

 Each call creates a `ToolEntry` stored in the singleton `ToolRegistry._tools` dict keyed by tool name. If a name collision occurs across toolsets, a warning is logged and the later registration wins.

-### Discovery: `_discover_tools()`
+### Discovery: `discover_builtin_tools()`

-When `model_tools.py` is imported, it calls `_discover_tools()` which imports every tool module in order:
+When `model_tools.py` is imported, it calls `discover_builtin_tools()` from `tools/registry.py`. This function scans every `tools/*.py` file using AST parsing to find modules that contain top-level `registry.register()` calls, then imports them:

 ```python
-_modules = [
-    "tools.web_tools",
-    "tools.terminal_tool",
-    "tools.file_tools",
-    "tools.vision_tools",
-    "tools.mixture_of_agents_tool",
-    "tools.image_generation_tool",
-    "tools.skills_tool",
-    "tools.skill_manager_tool",
-    "tools.browser_tool",
-    "tools.cronjob_tools",
-    "tools.rl_training_tool",
-    "tools.tts_tool",
-    "tools.todo_tool",
-    "tools.memory_tool",
-    "tools.session_search_tool",
-    "tools.clarify_tool",
-    "tools.code_execution_tool",
-    "tools.delegate_tool",
-    "tools.process_registry",
-    "tools.send_message_tool",
-    # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
-    "tools.homeassistant_tool",
-]
+# tools/registry.py (simplified)
+def discover_builtin_tools(tools_dir=None):
+    tools_path = Path(tools_dir) if tools_dir else Path(__file__).parent
+    for path in sorted(tools_path.glob("*.py")):
+        if path.name in {"__init__.py", "registry.py", "mcp_tool.py"}:
+            continue
+        if _module_registers_tools(path):  # AST check for top-level registry.register()
+            importlib.import_module(f"tools.{path.stem}")
 ```

+This auto-discovery means new tool files are picked up automatically — no manual list to maintain. The AST check only matches top-level `registry.register()` calls (not calls inside functions), so helper modules in `tools/` are not imported.
+
 Each import triggers the module's `registry.register()` calls. Errors in optional tools (e.g., missing `fal_client` for image generation) are caught and logged — they don't prevent other tools from loading.

 After core tool discovery, MCP tools and plugin tools are also discovered:
@@ -152,12 +152,15 @@ hermes setup

 ### Install optional Node dependencies manually

-The tested Termux path skips Node/browser bootstrap on purpose. If you want to experiment later:
+The tested Termux path skips Node/browser bootstrap on purpose. If you want to experiment with browser tooling later:

 ```bash
+pkg install nodejs-lts
 npm install
 ```

+The browser tool automatically includes Termux directories (`/data/data/com.termux/files/usr/bin`) in its PATH search, so `agent-browser` and `npx` are discovered without any extra PATH configuration.
+
 Treat browser / WhatsApp tooling on Android as experimental until documented otherwise.

 ---
@@ -35,9 +35,39 @@ docker run -d \
  --name hermes \
  --restart unless-stopped \
  -v ~/.hermes:/opt/data \
+  -p 8642:8642 \
  nousresearch/hermes-agent gateway run
 ```

+Port 8642 exposes the gateway's [OpenAI-compatible API server](./api-server.md) and health endpoint. It's optional if you only use chat platforms (Telegram, Discord, etc.), but required if you want the dashboard or external tools to reach the gateway.
+
+Opening any port on an internet facing machine is a security risk. You should not do it unless you understand the risks.
+
+## Running the dashboard
+
+The built-in web dashboard can run alongside the gateway as a separate container. 
+
+To run the dashboard as its own container, point it at the gateway's health endpoint so it can detect gateway status across containers:
+
+```sh
+docker run -d \
+  --name hermes-dashboard \
+  --restart unless-stopped \
+  -v ~/.hermes:/opt/data \
+  -p 9119:9119 \
+  -e GATEWAY_HEALTH_URL=http://$HOST_IP:8642 \
+  nousresearch/hermes-agent dashboard
+```
+
+Replace `$HOST_IP` with the IP address of the machine running the gateway container (e.g. `192.168.1.100`), or use a Docker network hostname if both containers share a network (see the [Compose example](#docker-compose-example) below).
+
+| Environment variable | Description | Default |
+|---------------------|-------------|---------|
+| `GATEWAY_HEALTH_URL` | Base URL of the gateway's API server, e.g. `http://gateway:8642` | *(unset — local PID check only)* |
+| `GATEWAY_HEALTH_TIMEOUT` | Health probe timeout in seconds | `3` |
+
+Without `GATEWAY_HEALTH_URL`, the dashboard falls back to local process detection — which only works when the gateway runs in the same container or on the same host.
+
 ## Running interactively (CLI chat)

 To open an interactive chat session against a running data directory:
@@ -66,7 +96,7 @@ The `/opt/data` volume is the single source of truth for all Hermes state. It ma
 | `skins/` | Custom CLI skins |

 :::warning
-Never run two Hermes containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent access.
+Never run two Hermes **gateway** containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent write access. Running a dashboard container alongside the gateway is safe since the dashboard only reads data.
 :::

 ## Environment variable forwarding
@@ -85,18 +115,21 @@ Direct `-e` flags override values from `.env`. This is useful for CI/CD or secre

 ## Docker Compose example

-For persistent gateway deployment, a `docker-compose.yaml` is convenient:
+For persistent deployment with both the gateway and dashboard, a `docker-compose.yaml` is convenient:

 ```yaml
-version: "3.8"
 services:
  hermes:
    image: nousresearch/hermes-agent:latest
    container_name: hermes
    restart: unless-stopped
    command: gateway run
+    ports:
+      - "8642:8642"
    volumes:
      - ~/.hermes:/opt/data
+    networks:
+      - hermes-net
    # Uncomment to forward specific env vars instead of using .env file:
    # environment:
    #   - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
@@ -107,9 +140,34 @@ services:
        limits:
          memory: 4G
          cpus: "2.0"
+
+  dashboard:
+    image: nousresearch/hermes-agent:latest
+    container_name: hermes-dashboard
+    restart: unless-stopped
+    command: dashboard --host 0.0.0.0
+    ports:
+      - "9119:9119"
+    volumes:
+      - ~/.hermes:/opt/data
+    environment:
+      - GATEWAY_HEALTH_URL=http://hermes:8642
+    networks:
+      - hermes-net
+    depends_on:
+      - hermes
+    deploy:
+      resources:
+        limits:
+          memory: 512M
+          cpus: "0.5"
+
+networks:
+  hermes-net:
+    driver: bridge
 ```

-Start with `docker compose up -d` and view logs with `docker compose logs -f hermes`.
+Start with `docker compose up -d` and view logs with `docker compose logs -f`.

 ## Resource limits

@@ -83,9 +83,11 @@ Standard OpenAI Chat Completions format. Stateless — the full conversation is
 }
 ```

-**Streaming** (`"stream": true`): Returns Server-Sent Events (SSE) with token-by-token response chunks. When streaming is enabled in config, tokens are emitted live as the LLM generates them. When disabled, the full response is sent as a single SSE chunk.
+**Streaming** (`"stream": true`): Returns Server-Sent Events (SSE) with token-by-token response chunks. For **Chat Completions**, the stream uses standard `chat.completion.chunk` events plus Hermes' custom `hermes.tool.progress` event for tool-start UX. For **Responses**, the stream uses OpenAI Responses event types such as `response.created`, `response.output_text.delta`, `response.output_item.added`, `response.output_item.done`, and `response.completed`.

-**Tool progress in streams**: When the agent calls tools during a streaming request, brief progress indicators are injected into the content stream as the tools start executing (e.g. `` `💻 pwd` ``, `` `🔍 Python docs` ``). These appear as inline markdown before the agent's response text, giving frontends like Open WebUI real-time visibility into tool execution.
+**Tool progress in streams**:
+- **Chat Completions**: Hermes emits `event: hermes.tool.progress` for tool-start visibility without polluting persisted assistant text.
+- **Responses**: Hermes emits spec-native `function_call` and `function_call_output` output items during the SSE stream, so clients can render structured tool UI in real time.

 ### POST /v1/responses

@@ -128,7 +130,7 @@ Chain responses to maintain full context (including tool calls) across turns:
 }
 ```

-The server reconstructs the full conversation from the stored response chain — all previous tool calls and results are preserved.
+The server reconstructs the full conversation from the stored response chain — all previous tool calls and results are preserved. Chained requests also share the same session, so multi-turn conversations appear as a single entry in the dashboard and session history.

 #### Named conversations

@@ -134,10 +134,10 @@ To use the Responses API mode:
 3. Change **API Type** from "Chat Completions" to **"Responses (Experimental)"**
 4. Save

-With the Responses API, Open WebUI sends requests in the Responses format (`input` array + `instructions`), and Hermes Agent can preserve full tool call history across turns via `previous_response_id`.
+With the Responses API, Open WebUI sends requests in the Responses format (`input` array + `instructions`), and Hermes Agent can preserve full tool call history across turns via `previous_response_id`. When `stream: true`, Hermes also streams spec-native `function_call` and `function_call_output` items, which enables custom structured tool-call UI in clients that render Responses events.

 :::note
-Open WebUI currently manages conversation history client-side even in Responses mode — it sends the full message history in each request rather than using `previous_response_id`. The Responses API mode is mainly useful for future compatibility as frontends evolve.
+Open WebUI currently manages conversation history client-side even in Responses mode — it sends the full message history in each request rather than using `previous_response_id`. The main advantage of Responses mode today is the structured event stream: text deltas, `function_call`, and `function_call_output` items arrive as OpenAI Responses SSE events instead of Chat Completions chunks.
 :::

 ## How It Works
@@ -0,0 +1,191 @@
+---
+sidebar_position: 2
+sidebar_label: "Google Workspace"
+title: "Google Workspace — Gmail, Calendar, Drive, Sheets & Docs"
+description: "Send email, manage calendar events, search Drive, read/write Sheets, and access Docs — all through OAuth2-authenticated Google APIs"
+---
+
+# Google Workspace Skill
+
+Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses OAuth2 with automatic token refresh. Prefers the [Google Workspace CLI (`gws`)](https://github.com/nicholasgasior/gws) when available for broader coverage, and falls back to Google's Python client libraries otherwise.
+
+**Skill path:** `skills/productivity/google-workspace/`
+
+## Setup
+
+The setup is fully agent-driven — ask Hermes to set up Google Workspace and it walks you through each step. The flow:
+
+1. **Create a Google Cloud project** and enable the required APIs (Gmail, Calendar, Drive, Sheets, Docs, People)
+2. **Create OAuth 2.0 credentials** (Desktop app type) and download the client secret JSON
+3. **Authorize** — Hermes generates an auth URL, you approve in the browser, paste back the redirect URL
+4. **Done** — token auto-refreshes from that point on
+
+:::tip Email-only users
+If you only need email (no Calendar/Drive/Sheets), use the **himalaya** skill instead — it works with a Gmail App Password and takes 2 minutes. No Google Cloud project needed.
+:::
+
+## Gmail
+
+### Searching
+
+```bash
+$GAPI gmail search "is:unread" --max 10
+$GAPI gmail search "from:boss@company.com newer_than:1d"
+$GAPI gmail search "has:attachment filename:pdf newer_than:7d"
+```
+
+Returns JSON with `id`, `from`, `subject`, `date`, `snippet`, and `labels` for each message.
+
+### Reading
+
+```bash
+$GAPI gmail get MESSAGE_ID
+```
+
+Returns the full message body as text (prefers plain text, falls back to HTML).
+
+### Sending
+
+```bash
+# Basic send
+$GAPI gmail send --to user@example.com --subject "Hello" --body "Message text"
+
+# HTML email
+$GAPI gmail send --to user@example.com --subject "Report" \
+  --body "<h1>Q4 Results</h1><p>Details here</p>" --html
+
+# Custom From header (display name + email)
+$GAPI gmail send --to user@example.com --subject "Hello" \
+  --from '"Research Agent" <user@example.com>' --body "Message text"
+
+# With CC
+$GAPI gmail send --to user@example.com --cc "team@example.com" \
+  --subject "Update" --body "FYI"
+```
+
+### Custom From Header
+
+The `--from` flag lets you customize the sender display name on outgoing emails. This is useful when multiple agents share the same Gmail account but you want recipients to see different names:
+
+```bash
+# Agent 1
+$GAPI gmail send --to client@co.com --subject "Research Summary" \
+  --from '"Research Agent" <shared@company.com>' --body "..."
+
+# Agent 2  
+$GAPI gmail send --to client@co.com --subject "Code Review" \
+  --from '"Code Assistant" <shared@company.com>' --body "..."
+```
+
+**How it works:** The `--from` value is set as the RFC 5322 `From` header on the MIME message. Gmail allows customizing the display name on your own authenticated email address without any additional configuration. Recipients see the custom display name (e.g. "Research Agent") while the email address stays the same.
+
+**Important:** If you use a *different email address* in `--from` (not the authenticated account), Gmail requires that address to be configured as a [Send As alias](https://support.google.com/mail/answer/22370) in Gmail Settings → Accounts → Send mail as.
+
+The `--from` flag works on both `send` and `reply`:
+
+```bash
+$GAPI gmail reply MESSAGE_ID \
+  --from '"Support Bot" <shared@company.com>' --body "We're on it"
+```
+
+### Replying
+
+```bash
+$GAPI gmail reply MESSAGE_ID --body "Thanks, that works for me."
+```
+
+Automatically threads the reply (sets `In-Reply-To` and `References` headers) and uses the original message's thread ID.
+
+### Labels
+
+```bash
+# List all labels
+$GAPI gmail labels
+
+# Add/remove labels
+$GAPI gmail modify MESSAGE_ID --add-labels LABEL_ID
+$GAPI gmail modify MESSAGE_ID --remove-labels UNREAD
+```
+
+## Calendar
+
+```bash
+# List events (defaults to next 7 days)
+$GAPI calendar list
+$GAPI calendar list --start 2026-03-01T00:00:00Z --end 2026-03-07T23:59:59Z
+
+# Create event (timezone required)
+$GAPI calendar create --summary "Team Standup" \
+  --start 2026-03-01T10:00:00-07:00 --end 2026-03-01T10:30:00-07:00
+
+# With location and attendees
+$GAPI calendar create --summary "Lunch" \
+  --start 2026-03-01T12:00:00Z --end 2026-03-01T13:00:00Z \
+  --location "Cafe" --attendees "alice@co.com,bob@co.com"
+
+# Delete event
+$GAPI calendar delete EVENT_ID
+```
+
+:::warning
+Calendar times **must** include a timezone offset (e.g. `-07:00`) or use UTC (`Z`). Bare datetimes like `2026-03-01T10:00:00` are ambiguous and will be treated as UTC.
+:::
+
+## Drive
+
+```bash
+$GAPI drive search "quarterly report" --max 10
+$GAPI drive search "mimeType='application/pdf'" --raw-query --max 5
+```
+
+## Sheets
+
+```bash
+# Read a range
+$GAPI sheets get SHEET_ID "Sheet1!A1:D10"
+
+# Write to a range
+$GAPI sheets update SHEET_ID "Sheet1!A1:B2" --values '[["Name","Score"],["Alice","95"]]'
+
+# Append rows
+$GAPI sheets append SHEET_ID "Sheet1!A:C" --values '[["new","row","data"]]'
+```
+
+## Docs
+
+```bash
+$GAPI docs get DOC_ID
+```
+
+Returns the document title and full text content.
+
+## Contacts
+
+```bash
+$GAPI contacts list --max 20
+```
+
+## Output Format
+
+All commands return JSON. Key fields per service:
+
+| Command | Fields |
+|---------|--------|
+| `gmail search` | `id`, `threadId`, `from`, `to`, `subject`, `date`, `snippet`, `labels` |
+| `gmail get` | `id`, `threadId`, `from`, `to`, `subject`, `date`, `labels`, `body` |
+| `gmail send/reply` | `status`, `id`, `threadId` |
+| `calendar list` | `id`, `summary`, `start`, `end`, `location`, `description`, `htmlLink` |
+| `calendar create` | `status`, `id`, `summary`, `htmlLink` |
+| `drive search` | `id`, `name`, `mimeType`, `modifiedTime`, `webViewLink` |
+| `contacts list` | `name`, `emails`, `phones` |
+| `sheets get` | 2D array of cell values |
+
+## Troubleshooting
+
+| Problem | Fix |
+|---------|-----|
+| `NOT_AUTHENTICATED` | Run setup (ask Hermes to set up Google Workspace) |
+| `REFRESH_FAILED` | Token revoked — re-run authorization steps |
+| `HttpError 403: Insufficient Permission` | Missing scope — revoke and re-authorize with the right services |
+| `HttpError 403: Access Not Configured` | API not enabled in Google Cloud Console |
+| `ModuleNotFoundError` | Run setup script with `--install-deps` |
@@ -92,6 +92,7 @@ const sidebars: SidebarsConfig = {
          label: 'Skills',
          items: [
            'user-guide/skills/godmode',
+            'user-guide/skills/google-workspace',
          ],
        },
      ],
@@ -118,7 +119,6 @@ const sidebars: SidebarsConfig = {
        'user-guide/messaging/wecom-callback',
        'user-guide/messaging/weixin',
        'user-guide/messaging/bluebubbles',
-        'user-guide/messaging/qqbot',
        'user-guide/messaging/open-webui',
        'user-guide/messaging/webhooks',
      ],
@@ -153,7 +153,6 @@ const sidebars: SidebarsConfig = {
        'guides/use-voice-mode-with-hermes',
        'guides/build-a-hermes-plugin',
        'guides/automate-with-cron',
-        'guides/automation-templates',
        'guides/cron-troubleshooting',
        'guides/work-with-skills',
        'guides/delegation-patterns',