fix(web): CronPage crash when rendering schedule object

The cron API returns schedule as {kind, expr, display} object but CronPage.tsx rendered it directly as a React child, crashing with 'Objects are not valid as a React child'. - Update CronJob interface in api.ts to match actual API response - Use schedule_display (string) instead of schedule (object) - Use state instead of status for job state - Use last_error instead of error for error display
fix: restore clarify toolset row removed in cherry-pick
2026-04-13 12:01:12 +02:00 · 2026-04-13 02:49:11 -07:00 · 2026-04-13 02:49:11 -07:00 · 2026-04-13 02:12:01 -07:00 · 2026-04-13 01:15:27 -07:00 · 2026-04-13 00:49:22 -07:00
149 changed files with 21399 additions and 833 deletions
@@ -0,0 +1,2 @@
+# Auto-generated files — collapse diffs and exclude from language stats
+web/package-lock.json linguist-generated=true
@@ -41,11 +41,19 @@ jobs:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
+        run: pip install pyyaml httpx

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Build skills index (if not already present)
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ ! -f website/static/api/skills-index.json ]; then
+            python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
+          fi
+
      - name: Install dependencies
        run: npm ci
        working-directory: website
@@ -69,9 +69,7 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: |
-            nousresearch/hermes-agent:latest
-            nousresearch/hermes-agent:${{ github.sha }}
+          tags: nousresearch/hermes-agent:latest
          cache-from: type=gha
          cache-to: type=gha,mode=max

@@ -83,9 +81,6 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: |
-            nousresearch/hermes-agent:latest
-            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
-            nousresearch/hermes-agent:${{ github.sha }}
+          tags: nousresearch/hermes-agent:${{ github.event.release.tag_name }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
@@ -0,0 +1,101 @@
+name: Build Skills Index
+
+on:
+  schedule:
+    # Run twice daily: 6 AM and 6 PM UTC
+    - cron: '0 6,18 * * *'
+  workflow_dispatch:  # Manual trigger
+  push:
+    branches: [main]
+    paths:
+      - 'scripts/build_skills_index.py'
+      - '.github/workflows/skills-index.yml'
+
+permissions:
+  contents: read
+
+jobs:
+  build-index:
+    # Only run on the upstream repository, not on forks
+    if: github.repository == 'NousResearch/hermes-agent'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: pip install httpx pyyaml
+
+      - name: Build skills index
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: python scripts/build_skills_index.py
+
+      - name: Upload index artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: skills-index
+          path: website/static/api/skills-index.json
+          retention-days: 7
+
+  deploy-with-index:
+    needs: build-index
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deploy.outputs.page_url }}
+    # Only deploy on schedule or manual trigger (not on every push to the script)
+    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/download-artifact@v4
+        with:
+          name: skills-index
+          path: website/static/api/
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+          cache-dependency-path: website/package-lock.json
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install PyYAML for skill extraction
+        run: pip install pyyaml
+
+      - name: Extract skill metadata for dashboard
+        run: python3 website/scripts/extract-skills.py
+
+      - name: Install dependencies
+        run: npm ci
+        working-directory: website
+
+      - name: Build Docusaurus
+        run: npm run build
+        working-directory: website
+
+      - name: Stage deployment
+        run: |
+          mkdir -p _site/docs
+          cp -r landingpage/* _site/
+          cp -r website/build/* _site/docs/
+          echo "hermes-agent.nousresearch.com" > _site/CNAME
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: _site
+
+      - name: Deploy to GitHub Pages
+        id: deploy
+        uses: actions/deploy-pages@v4
@@ -51,6 +51,9 @@ ignored/
 .worktrees/
 environments/benchmarks/evals/

+# Web UI build output
+hermes_cli/web_dist/
+
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
@@ -58,3 +61,4 @@ mini-swe-agent/
 # Nix
 .direnv/
 result
+website/static/api/skills-index.json
@@ -167,6 +167,7 @@ python -m pytest tests/ -q
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
+- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---

@@ -1021,6 +1021,23 @@ _AUTO_PROVIDER_LABELS = {

 _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})

+_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
+
+
+def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
+    """Return a sanitized copy of a live main-runtime override."""
+    if not isinstance(main_runtime, dict):
+        return {}
+    normalized: Dict[str, str] = {}
+    for field in _MAIN_RUNTIME_FIELDS:
+        value = main_runtime.get(field)
+        if isinstance(value, str) and value.strip():
+            normalized[field] = value.strip()
+    provider = normalized.get("provider")
+    if provider:
+        normalized["provider"] = provider.lower()
+    return normalized
+

 def _get_provider_chain() -> List[tuple]:
    """Return the ordered provider detection chain.
@@ -1130,7 +1147,7 @@ def _try_payment_fallback(
    return None, None, ""


-def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -1142,6 +1159,12 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    """
    global auxiliary_is_nous, _stale_base_url_warned
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_provider = runtime.get("provider", "")
+    runtime_model = runtime.get("model", "")
+    runtime_base_url = runtime.get("base_url", "")
+    runtime_api_key = runtime.get("api_key", "")
+    runtime_api_mode = runtime.get("api_mode", "")

    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
    #    provider (not 'custom').  This catches the common "env poisoning"
@@ -1149,7 +1172,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    #    old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
    if not _stale_base_url_warned:
        _env_base = os.getenv("OPENAI_BASE_URL", "").strip()
-        _cfg_provider = _read_main_provider()
+        _cfg_provider = runtime_provider or _read_main_provider()
        if (_env_base and _cfg_provider
                and _cfg_provider != "custom"
                and not _cfg_provider.startswith("custom:")):
@@ -1163,12 +1186,25 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
            _stale_base_url_warned = True

    # ── Step 1: non-aggregator main provider → use main model directly ──
-    main_provider = _read_main_provider()
-    main_model = _read_main_model()
+    main_provider = runtime_provider or _read_main_provider()
+    main_model = runtime_model or _read_main_model()
    if (main_provider and main_model
            and main_provider not in _AGGREGATOR_PROVIDERS
            and main_provider not in ("auto", "")):
-        client, resolved = resolve_provider_client(main_provider, main_model)
+        resolved_provider = main_provider
+        explicit_base_url = None
+        explicit_api_key = None
+        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
+            resolved_provider = "custom"
+            explicit_base_url = runtime_base_url
+            explicit_api_key = runtime_api_key or None
+        client, resolved = resolve_provider_client(
+            resolved_provider,
+            main_model,
+            explicit_base_url=explicit_base_url,
+            explicit_api_key=explicit_api_key,
+            api_mode=runtime_api_mode or None,
+        )
        if client is not None:
            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
                        main_provider, resolved or main_model)
@@ -1249,6 +1285,7 @@ def resolve_provider_client(
    explicit_base_url: str = None,
    explicit_api_key: str = None,
    api_mode: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1319,7 +1356,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto()
+        client, resolved = _resolve_auto(main_runtime=main_runtime)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -1543,7 +1580,11 @@ def resolve_provider_client(

 # ── Public API ──────────────────────────────────────────────────────────────

-def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
+def get_text_auxiliary_client(
+    task: str = "",
+    *,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, default_model_slug) for text-only auxiliary tasks.

    Args:
@@ -1560,10 +1601,11 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=main_runtime,
    )


-def get_async_text_auxiliary_client(task: str = ""):
+def get_async_text_auxiliary_client(task: str = "", *, main_runtime: Optional[Dict[str, Any]] = None):
    """Return (async_client, model_slug) for async consumers.

    For standard providers returns (AsyncOpenAI, model). For Codex returns
@@ -1578,6 +1620,7 @@ def get_async_text_auxiliary_client(task: str = ""):
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=main_runtime,
    )


@@ -1892,6 +1935,7 @@ def _get_cached_client(
    base_url: str = None,
    api_key: str = None,
    api_mode: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -1915,7 +1959,9 @@ def _get_cached_client(
            loop_id = id(current_loop)
        except RuntimeError:
            pass
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -1940,6 +1986,7 @@ def _get_cached_client(
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=runtime,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -2065,6 +2112,75 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
    return default


+# ---------------------------------------------------------------------------
+# Anthropic-compatible endpoint detection + image block conversion
+# ---------------------------------------------------------------------------
+
+# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
+# Their image content blocks must use Anthropic format, not OpenAI format.
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+
+
+def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
+    """Detect if an endpoint expects Anthropic-format content blocks.
+
+    Returns True for known Anthropic-compatible providers (MiniMax) and
+    any endpoint whose URL contains ``/anthropic`` in the path.
+    """
+    if provider in _ANTHROPIC_COMPAT_PROVIDERS:
+        return True
+    url_lower = (base_url or "").lower()
+    return "/anthropic" in url_lower
+
+
+def _convert_openai_images_to_anthropic(messages: list) -> list:
+    """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks.
+
+    Only touches messages that have list-type content with ``image_url`` blocks;
+    plain text messages pass through unchanged.
+    """
+    converted = []
+    for msg in messages:
+        content = msg.get("content")
+        if not isinstance(content, list):
+            converted.append(msg)
+            continue
+        new_content = []
+        changed = False
+        for block in content:
+            if block.get("type") == "image_url":
+                image_url_val = (block.get("image_url") or {}).get("url", "")
+                if image_url_val.startswith("data:"):
+                    # Parse data URI: data:<media_type>;base64,<data>
+                    header, _, b64data = image_url_val.partition(",")
+                    media_type = "image/png"
+                    if ":" in header and ";" in header:
+                        media_type = header.split(":", 1)[1].split(";", 1)[0]
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": media_type,
+                            "data": b64data,
+                        },
+                    })
+                else:
+                    # URL-based image
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "url",
+                            "url": image_url_val,
+                        },
+                    })
+                changed = True
+            else:
+                new_content.append(block)
+        converted.append({**msg, "content": new_content} if changed else msg)
+    return converted
+
+
+
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -2149,6 +2265,7 @@ def call_llm(
    model: str = None,
    base_url: str = None,
    api_key: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@@ -2214,6 +2331,7 @@ def call_llm(
            base_url=resolved_base_url,
            api_key=resolved_api_key,
            api_mode=resolved_api_mode,
+            main_runtime=main_runtime,
        )
        if client is None:
            # When the user explicitly chose a non-OpenRouter provider but no
@@ -2234,7 +2352,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto")
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -2255,6 +2373,11 @@ def call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return _validate_llm_response(
@@ -2331,9 +2454,9 @@ def extract_content_or_reasoning(response) -> str:
    if content:
        # Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
        cleaned = re.sub(
-            r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
+            r"<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>"
            r".*?"
-            r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
+            r"</(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>",
            "", content, flags=re.DOTALL | re.IGNORECASE,
        ).strip()
        if cleaned:
@@ -2443,6 +2566,11 @@ async def async_call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    try:
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
@@ -86,12 +86,14 @@ class ContextCompressor(ContextEngine):
        base_url: str = "",
        api_key: str = "",
        provider: str = "",
+        api_mode: str = "",
    ) -> None:
        """Update model info after a model switch or fallback activation."""
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
+        self.api_mode = api_mode
        self.context_length = context_length
        self.threshold_tokens = max(
            int(context_length * self.threshold_percent),
@@ -111,11 +113,13 @@ class ContextCompressor(ContextEngine):
        api_key: str = "",
        config_context_length: int | None = None,
        provider: str = "",
+        api_mode: str = "",
    ):
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
+        self.api_mode = api_mode
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
@@ -438,6 +442,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        try:
            call_kwargs = {
                "task": "compression",
+                "main_runtime": {
+                    "model": self.model,
+                    "provider": self.provider,
+                    "base_url": self.base_url,
+                    "api_key": self.api_key,
+                    "api_mode": self.api_mode,
+                },
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": summary_budget * 2,
                # timeout resolved from auxiliary.compression.timeout config by call_llm
@@ -24,6 +24,7 @@ from hermes_cli.auth import (
    _codex_access_token_is_expiring,
    _decode_jwt_claims,
    _import_codex_cli_tokens,
+    _write_codex_cli_tokens,
    _load_auth_store,
    _load_provider_state,
    _resolve_kimi_base_url,
@@ -693,6 +694,14 @@ class CredentialPool:
                        self._replace_entry(synced, updated)
                        self._persist()
                        self._sync_device_code_entry_to_auth_store(updated)
+                        try:
+                            _write_codex_cli_tokens(
+                                updated.access_token,
+                                updated.refresh_token,
+                                last_refresh=updated.last_refresh,
+                            )
+                        except Exception as wexc:
+                            logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
                        return updated
                    except Exception as retry_exc:
                        logger.debug("Codex retry refresh also failed: %s", retry_exc)
@@ -718,6 +727,17 @@ class CredentialPool:
        # _seed_from_singletons() on the next load_pool() sees fresh state
        # instead of re-seeding stale/consumed tokens.
        self._sync_device_code_entry_to_auth_store(updated)
+        # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
+        # and VS Code don't hit "refresh_token_reused" on their next refresh.
+        if self.provider == "openai-codex":
+            try:
+                _write_codex_cli_tokens(
+                    updated.access_token,
+                    updated.refresh_token,
+                    last_refresh=updated.last_refresh,
+                )
+            except Exception as wexc:
+                logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
        return updated

    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -144,6 +144,8 @@ class ProviderInfo:
 PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openrouter": "openrouter",
    "anthropic": "anthropic",
+    "openai": "openai",
+    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
    "minimax": "minimax",
@@ -12,7 +12,7 @@ import threading
 from collections import OrderedDict
 from pathlib import Path

-from hermes_constants import get_hermes_home, get_skills_dir
+from hermes_constants import get_hermes_home, get_skills_dir, is_wsl
 from typing import Optional

 from agent.skill_utils import (
@@ -366,6 +366,36 @@ PLATFORM_HINTS = {
    ),
 }

+# ---------------------------------------------------------------------------
+# Environment hints — execution-environment awareness for the agent.
+# Unlike PLATFORM_HINTS (which describe the messaging channel), these describe
+# the machine/OS the agent's tools actually run on.
+# ---------------------------------------------------------------------------
+
+WSL_ENVIRONMENT_HINT = (
+    "You are running inside WSL (Windows Subsystem for Linux). "
+    "The Windows host filesystem is mounted under /mnt/ — "
+    "/mnt/c/ is the C: drive, /mnt/d/ is D:, etc. "
+    "The user's Windows files are typically at "
+    "/mnt/c/Users/<username>/Desktop/, Documents/, Downloads/, etc. "
+    "When the user references Windows paths or desktop files, translate "
+    "to the /mnt/c/ equivalent. You can list /mnt/c/Users/ to discover "
+    "the Windows username if needed."
+)
+
+
+def build_environment_hints() -> str:
+    """Return environment-specific guidance for the system prompt.
+
+    Detects WSL, and can be extended for Termux, Docker, etc.
+    Returns an empty string when no special environment is detected.
+    """
+    hints: list[str] = []
+    if is_wsl():
+        hints.append(WSL_ENVIRONMENT_HINT)
+    return "\n\n".join(hints)
+
+
 CONTEXT_FILE_MAX_CHARS = 20_000
 CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
@@ -726,8 +756,16 @@ def build_skills_system_prompt(

        result = (
            "## Skills (mandatory)\n"
-            "Before replying, scan the skills below. If one clearly matches your task, "
-            "load it with skill_view(name) and follow its instructions. "
+            "Before replying, scan the skills below. If a skill matches or is even partially relevant "
+            "to your task, you MUST load it with skill_view(name) and follow its instructions. "
+            "Err on the side of loading — it is always better to have context you don't need "
+            "than to miss critical steps, pitfalls, or established workflows. "
+            "Skills contain specialized knowledge — API endpoints, tool-specific commands, "
+            "and proven workflows that outperform general-purpose approaches. Load the skill "
+            "even if you think you could handle the task with basic tools like web_search or terminal. "
+            "Skills also encode the user's preferred approach, conventions, and quality standards "
+            "for tasks like code review, planning, and testing — load them even for tasks you "
+            "already know how to do, because the skill defines how it should be done here.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -737,7 +775,7 @@ def build_skills_system_prompt(
            + "\n".join(index_lines) + "\n"
            "</available_skills>\n"
            "\n"
-            "If none match, proceed normally without loading a skill."
+            "Only proceed without loading a skill if genuinely none are relevant to the task."
        )

    # ── Store in LRU cache ────────────────────────────────────────────
@@ -36,7 +36,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =

    try:
        response = call_llm(
-            task="compression",  # reuse compression task config (cheap/fast model)
+            task="title_generation",
            messages=messages,
            max_tokens=30,
            temperature=0.3,
@@ -2420,8 +2420,8 @@ class HermesCLI:
        # suppress them during streaming too — unless show_reasoning is
        # enabled, in which case we route the inner content to the
        # reasoning display box instead of discarding it.
-        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>")
-        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>")
+        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>", "<thought>")
+        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>", "</thought>")

        # Append to a pre-filter buffer first
        self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
@@ -2735,6 +2735,22 @@ class HermesCLI:
        if runtime_model and isinstance(runtime_model, str):
            self.model = runtime_model

+        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
+        # without `hermes model`), fall back to the provider's first catalog
+        # model so the API call doesn't fail with "model must be non-empty".
+        if not self.model and resolved_provider:
+            try:
+                from hermes_cli.models import get_default_model_for_provider
+                _default = get_default_model_for_provider(resolved_provider)
+                if _default:
+                    self.model = _default
+                    logger.info(
+                        "No model configured — defaulting to %s for provider %s",
+                        _default, resolved_provider,
+                    )
+            except Exception:
+                pass
+
        # Normalize model for the resolved provider (e.g. swap non-Codex
        # models when provider is openai-codex).  Fixes #651.
        model_changed = self._normalize_model_for_provider(resolved_provider)
@@ -3098,6 +3114,8 @@ class HermesCLI:

        # Collect displayable entries (skip system, tool-result messages)
        entries = []  # list of (role, display_text)
+        _last_asst_idx = None       # index of last assistant entry
+        _last_asst_full = None      # un-truncated display text for last assistant
        for msg in self.conversation_history:
            role = msg.get("role", "")
            content = msg.get("content")
@@ -3127,7 +3145,9 @@ class HermesCLI:
                text = "" if content is None else str(content)
                text = _strip_reasoning(text)
                parts = []
+                full_parts = []  # un-truncated version
                if text:
+                    full_parts.append(text)
                    lines = text.splitlines()
                    if len(lines) > MAX_ASST_LINES:
                        text = "\n".join(lines[:MAX_ASST_LINES]) + " ..."
@@ -3147,11 +3167,15 @@ class HermesCLI:
                    if len(names) > 4:
                        names_str += ", ..."
                    noun = "call" if tc_count == 1 else "calls"
-                    parts.append(f"[{tc_count} tool {noun}: {names_str}]")
+                    tc_summary = f"[{tc_count} tool {noun}: {names_str}]"
+                    parts.append(tc_summary)
+                    full_parts.append(tc_summary)
                if not parts:
                    # Skip pure-reasoning messages that have no visible output
                    continue
                entries.append(("assistant", " ".join(parts)))
+                _last_asst_idx = len(entries) - 1
+                _last_asst_full = " ".join(full_parts)

        if not entries:
            return
@@ -3162,6 +3186,13 @@ class HermesCLI:
            skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2
            entries = entries[skipped:]

+        # Replace last assistant entry with full (un-truncated) text
+        # so the user can see where they left off without wasting tokens.
+        if _last_asst_idx is not None and _last_asst_full:
+            adj_idx = _last_asst_idx - skipped
+            if 0 <= adj_idx < len(entries):
+                entries[adj_idx] = ("assistant_last", _last_asst_full)
+
        # Build the display using Rich
        from rich.panel import Panel
        from rich.text import Text
@@ -3194,6 +3225,13 @@ class HermesCLI:
                lines.append(msg_lines[0] + "\n", style="dim")
                for ml in msg_lines[1:]:
                    lines.append(f"         {ml}\n", style="dim")
+            elif role == "assistant_last":
+                # Last assistant response shown in full, non-dim
+                lines.append("  ◆ Hermes: ", style=f"bold {_assistant_label_c}")
+                msg_lines = text.splitlines()
+                lines.append(msg_lines[0] + "\n", style="")
+                for ml in msg_lines[1:]:
+                    lines.append(f"            {ml}\n", style="")
            else:
                lines.append("  ◆ Hermes: ", style=f"dim bold {_assistant_label_c}")
                msg_lines = text.splitlines()
@@ -5375,10 +5413,16 @@ class HermesCLI:
            self._show_usage()
        elif canonical == "insights":
            self._show_insights(cmd_original)
+        elif canonical == "debug":
+            self._handle_debug_command()
        elif canonical == "paste":
            self._handle_paste_command()
        elif canonical == "image":
            self._handle_image_command(cmd_original)
+        elif canonical == "reload":
+            from hermes_cli.config import reload_env
+            count = reload_env()
+            print(f"  Reloaded .env ({count} var(s) updated)")
        elif canonical == "reload-mcp":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._reload_mcp()
@@ -6289,6 +6333,14 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")

+    def _handle_debug_command(self):
+        """Handle /debug — upload debug report + logs and print paste URLs."""
+        from hermes_cli.debug import run_debug_share
+        from types import SimpleNamespace
+
+        args = SimpleNamespace(lines=200, expire=7, local=False)
+        run_debug_share(args)
+
    def _show_usage(self):
        """Show rate limits (if available) and session token usage."""
        if not self.agent:
@@ -7601,8 +7653,10 @@ class HermesCLI:
                        "error": _summary,
                    }

-            # Start agent in background thread
-            agent_thread = threading.Thread(target=run_agent)
+            # Start agent in background thread (daemon so it cannot keep the
+            # process alive when the user closes the terminal tab — SIGHUP
+            # exits the main thread and daemon threads are reaped automatically).
+            agent_thread = threading.Thread(target=run_agent, daemon=True)
            agent_thread.start()

            # Monitor the dedicated interrupt queue while the agent runs.
@@ -9553,17 +9607,37 @@ class HermesCLI:
            pass  # Signal handlers may fail in restricted environments
        
        # Install a custom asyncio exception handler that suppresses the
-        # "Event loop is closed" RuntimeError from httpx transport cleanup.
-        # This is defense-in-depth — the primary fix is neuter_async_httpx_del
-        # which disables __del__ entirely, but older clients or SDK upgrades
-        # could bypass it.
+        # "Event loop is closed" RuntimeError from httpx transport cleanup
+        # and the "0 is not registered" KeyError from broken stdin (#6393).
+        # The RuntimeError fix is defense-in-depth — the primary fix is
+        # neuter_async_httpx_del which disables __del__ entirely.  The
+        # KeyError fix handles macOS + uv-managed Python environments where
+        # fd 0 is not reliably available to the asyncio selector.
        def _suppress_closed_loop_errors(loop, context):
            exc = context.get("exception")
            if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc):
                return  # silently suppress
+            if isinstance(exc, KeyError) and "is not registered" in str(exc):
+                return  # suppress selector registration failures (#6393)
            # Fall back to default handler for everything else
            loop.default_exception_handler(context)

+        # Validate stdin before launching prompt_toolkit — on macOS with
+        # uv-managed Python, fd 0 can be invalid or unregisterable with the
+        # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
+        try:
+            import os as _os
+            _os.fstat(0)
+        except OSError:
+            print(
+                "Error: stdin (fd 0) is not available.\n"
+                "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
+                "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
+            )
+            _run_cleanup()
+            self._print_exit_summary()
+            return
+
        # Run the application with patch_stdout for proper output handling
        try:
            with patch_stdout():
@@ -9577,8 +9651,28 @@ class HermesCLI:
                app.run()
        except (EOFError, KeyboardInterrupt, BrokenPipeError):
            pass
+        except (KeyError, OSError) as _stdin_err:
+            # Catch selector registration failures from broken stdin (#6393).
+            # This is the fallback for cases that slip past the fstat() guard.
+            if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
+                print(
+                    f"\nError: stdin is not usable ({_stdin_err}).\n"
+                    "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
+                    "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
+                )
+            else:
+                raise
        finally:
            self._should_exit = True
+            # Interrupt the agent immediately so its daemon thread stops making
+            # API calls and exits promptly (agent_thread is daemon, so the
+            # process will exit once the main thread finishes, but interrupting
+            # avoids wasted API calls and lets run_conversation clean up).
+            if self.agent and getattr(self, '_agent_running', False):
+                try:
+                    self.agent.interrupt()
+                except Exception:
+                    pass
            # Flush memories before exit (only for substantial conversations)
            if self.agent and self.conversation_history:
                try:
@@ -665,6 +665,17 @@ def load_gateway_config() -> GatewayConfig:
    _apply_env_overrides(config)
    
    # --- Validate loaded values ---
+    _validate_gateway_config(config)
+
+    return config
+
+
+def _validate_gateway_config(config: "GatewayConfig") -> None:
+    """Validate and sanitize a loaded GatewayConfig in place.
+
+    Called by ``load_gateway_config()`` after all config sources are merged.
+    Extracted as a separate function for testability.
+    """
    policy = config.default_reset_policy

    if not (0 <= policy.at_hour <= 23):
@@ -701,7 +712,31 @@ def load_gateway_config() -> GatewayConfig:
                platform.value, env_name,
            )

-    return config
+    # Reject known-weak placeholder tokens.
+    # Ported from openclaw/openclaw#64586: users who copy .env.example
+    # without changing placeholder values get a clear startup error instead
+    # of a confusing "auth failed" from the platform API.
+    try:
+        from hermes_cli.auth import has_usable_secret
+    except ImportError:
+        has_usable_secret = None  # type: ignore[assignment]
+
+    if has_usable_secret is not None:
+        for platform, pconfig in config.platforms.items():
+            if not pconfig.enabled:
+                continue
+            env_name = _token_env_names.get(platform)
+            if not env_name:
+                continue
+            token = pconfig.token
+            if token and token.strip() and not has_usable_secret(token, min_length=4):
+                logger.error(
+                    "%s is enabled but %s is set to a placeholder value ('%s'). "
+                    "Set a real bot token before starting the gateway. "
+                    "The adapter will NOT be started.",
+                    platform.value, env_name, token.strip()[:6] + "...",
+                )
+                pconfig.enabled = False


 def _apply_env_overrides(config: GatewayConfig) -> None:
@@ -82,7 +82,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {

    # Tier 3 — no edit support, progress messages are permanent
    "signal":          _TIER_LOW,
-    "whatsapp":        _TIER_LOW,
+    "whatsapp":        _TIER_MEDIUM,  # Baileys bridge supports /edit
    "bluebubbles":     _TIER_LOW,
    "weixin":          _TIER_LOW,
    "wecom":           _TIER_LOW,
@@ -54,6 +54,66 @@ DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
 MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies
 CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
+MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
+MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array
+
+
+def _normalize_chat_content(
+    content: Any, *, _max_depth: int = 10, _depth: int = 0,
+) -> str:
+    """Normalize OpenAI chat message content into a plain text string.
+
+    Some clients (Open WebUI, LobeChat, etc.) send content as an array of
+    typed parts instead of a plain string::
+
+        [{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}]
+
+    This function flattens those into a single string so the agent pipeline
+    (which expects strings) doesn't choke.
+
+    Defensive limits prevent abuse: recursion depth, list size, and output
+    length are all bounded.
+    """
+    if _depth > _max_depth:
+        return ""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+
+    if isinstance(content, list):
+        parts: List[str] = []
+        items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+        for item in items:
+            if isinstance(item, str):
+                if item:
+                    parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH])
+            elif isinstance(item, dict):
+                item_type = str(item.get("type") or "").strip().lower()
+                if item_type in {"text", "input_text", "output_text"}:
+                    text = item.get("text", "")
+                    if text:
+                        try:
+                            parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH])
+                        except Exception:
+                            pass
+                # Silently skip image_url / other non-text parts
+            elif isinstance(item, list):
+                nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1)
+                if nested:
+                    parts.append(nested)
+            # Check accumulated size
+            if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH:
+                break
+        result = "\n".join(parts)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+
+    # Fallback for unexpected types (int, float, bool, etc.)
+    try:
+        result = str(content)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+    except Exception:
+        return ""


 def check_api_server_requirements() -> bool:
@@ -553,7 +613,7 @@ class APIServerAdapter(BasePlatformAdapter):

        for msg in messages:
            role = msg.get("role", "")
-            content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
                # Accumulate system messages
                if system_prompt is None:
@@ -926,18 +986,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = item.get("content", "")
-                    # Handle content that may be a list of content parts
-                    if isinstance(content, list):
-                        text_parts = []
-                        for part in content:
-                            if isinstance(part, dict) and part.get("type") == "input_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, dict) and part.get("type") == "output_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, str):
-                                text_parts.append(part)
-                        content = "\n".join(text_parts)
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1770,6 +1819,23 @@ class APIServerAdapter(BasePlatformAdapter):
                )
                return False

+            # Refuse to start network-accessible with a placeholder key.
+            # Ported from openclaw/openclaw#64586.
+            if is_network_accessible(self._host) and self._api_key:
+                try:
+                    from hermes_cli.auth import has_usable_secret
+                    if not has_usable_secret(self._api_key, min_length=8):
+                        logger.error(
+                            "[%s] Refusing to start: API_SERVER_KEY is set to a "
+                            "placeholder value. Generate a real secret "
+                            "(e.g. `openssl rand -hex 32`) and set API_SERVER_KEY "
+                            "before exposing the API server on %s.",
+                            self.name, self._host,
+                        )
+                        return False
+                except ImportError:
+                    pass
+
            # Port conflict detection — fail fast if port is already in use
            try:
                with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
@@ -21,6 +21,59 @@ from urllib.parse import urlsplit
 logger = logging.getLogger(__name__)


+def utf16_len(s: str) -> int:
+    """Count UTF-16 code units in *s*.
+
+    Telegram's message-length limit (4 096) is measured in UTF-16 code units,
+    **not** Unicode code-points.  Characters outside the Basic Multilingual
+    Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
+    surrogate pairs and therefore consume **two** UTF-16 code units each, even
+    though Python's ``len()`` counts them as one.
+
+    Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
+    Rust's ``chars().count()``.
+    """
+    return len(s.encode("utf-16-le")) // 2
+
+
+def _prefix_within_utf16_limit(s: str, limit: int) -> str:
+    """Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.
+
+    Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
+    we never slice a multi-code-unit character in half.
+    """
+    if utf16_len(s) <= limit:
+        return s
+    # Binary search for the longest safe prefix
+    lo, hi = 0, len(s)
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if utf16_len(s[:mid]) <= limit:
+            lo = mid
+        else:
+            hi = mid - 1
+    return s[:lo]
+
+
+def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
+    """Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.
+
+    Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
+    length in units different from Python codepoints (e.g. UTF-16 code units).
+    Falls back to binary search which is O(log n) calls to *len_fn*.
+    """
+    if len_fn(s) <= budget:
+        return len(s)
+    lo, hi = 0, len(s)
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if len_fn(s[:mid]) <= budget:
+            lo = mid
+        else:
+            hi = mid - 1
+    return lo
+
+
 def is_network_accessible(host: str) -> bool:
    """Return True if *host* would expose the server beyond loopback.

@@ -1886,7 +1939,11 @@ class BasePlatformAdapter(ABC):
        return content
    
    @staticmethod
-    def truncate_message(content: str, max_length: int = 4096) -> List[str]:
+    def truncate_message(
+        content: str,
+        max_length: int = 4096,
+        len_fn: Optional["Callable[[str], int]"] = None,
+    ) -> List[str]:
        """
        Split a long message into chunks, preserving code block boundaries.

@@ -1898,11 +1955,16 @@ class BasePlatformAdapter(ABC):
        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
+            len_fn: Optional length function for measuring string length.
+                     Defaults to ``len`` (Unicode code-points).  Pass
+                     ``utf16_len`` for platforms that measure message
+                     length in UTF-16 code units (e.g. Telegram).

        Returns:
            List of message chunks
        """
-        if len(content) <= max_length:
+        _len = len_fn or len
+        if _len(content) <= max_length:
            return [content]

        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
@@ -1921,22 +1983,33 @@ class BasePlatformAdapter(ABC):

            # How much body text we can fit after accounting for the prefix,
            # a potential closing fence, and the chunk indicator.
-            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+            headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
            if headroom < 1:
                headroom = max_length // 2

            # Everything remaining fits in one final chunk
-            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+            if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
                chunks.append(prefix + remaining)
                break

-            # Find a natural split point (prefer newlines, then spaces)
-            region = remaining[:headroom]
+            # Find a natural split point (prefer newlines, then spaces).
+            # When _len != len (e.g. utf16_len for Telegram), headroom is
+            # measured in the custom unit.  We need codepoint-based slice
+            # positions that stay within the custom-unit budget.
+            #
+            # _safe_slice_pos() maps a custom-unit budget to the largest
+            # codepoint offset whose custom length ≤ budget.
+            if _len is not len:
+                # Map headroom (custom units) → codepoint slice length
+                _cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
+            else:
+                _cp_limit = headroom
+            region = remaining[:_cp_limit]
            split_at = region.rfind("\n")
-            if split_at < headroom // 2:
+            if split_at < _cp_limit // 2:
                split_at = region.rfind(" ")
            if split_at < 1:
-                split_at = headroom
+                split_at = _cp_limit

            # Avoid splitting inside an inline code span (`...`).
            # If the text before split_at has an odd number of unescaped
@@ -1956,7 +2029,7 @@ class BasePlatformAdapter(ABC):
                    safe_split = candidate.rfind(" ", 0, last_bt)
                    nl_split = candidate.rfind("\n", 0, last_bt)
                    safe_split = max(safe_split, nl_split)
-                    if safe_split > headroom // 4:
+                    if safe_split > _cp_limit // 4:
                        split_at = safe_split

            chunk_body = remaining[:split_at]
@@ -34,6 +34,9 @@ from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional
+from urllib.error import HTTPError, URLError
+from urllib.parse import urlencode
+from urllib.request import Request, urlopen

 # aiohttp/websockets are independent optional deps — import outside lark_oapi
 # so they remain available for tests and webhook mode even if lark_oapi is missing.
@@ -169,6 +172,19 @@ _FEISHU_CARD_ACTION_DEDUP_TTL_SECONDS = 15 * 60    # card action token dedup win
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
 _FEISHU_ACK_EMOJI = "OK"
+
+# QR onboarding constants
+_ONBOARD_ACCOUNTS_URLS = {
+    "feishu": "https://accounts.feishu.cn",
+    "lark": "https://accounts.larksuite.com",
+}
+_ONBOARD_OPEN_URLS = {
+    "feishu": "https://open.feishu.cn",
+    "lark": "https://open.larksuite.com",
+}
+_REGISTRATION_PATH = "/oauth/v1/app/registration"
+_ONBOARD_REQUEST_TIMEOUT_S = 10
+
 # ---------------------------------------------------------------------------
 # Fallback display strings
 # ---------------------------------------------------------------------------
@@ -3621,3 +3637,328 @@ class FeishuAdapter(BasePlatformAdapter):
            return _FEISHU_FILE_UPLOAD_TYPE, "file"

        return _FEISHU_FILE_UPLOAD_TYPE, "file"
+
+
+# =============================================================================
+# QR scan-to-create onboarding
+#
+# Device-code flow: user scans a QR code with Feishu/Lark mobile app and the
+# platform creates a fully configured bot application automatically.
+# Called by `hermes gateway setup` via _setup_feishu() in hermes_cli/gateway.py.
+# =============================================================================
+
+
+def _accounts_base_url(domain: str) -> str:
+    return _ONBOARD_ACCOUNTS_URLS.get(domain, _ONBOARD_ACCOUNTS_URLS["feishu"])
+
+
+def _onboard_open_base_url(domain: str) -> str:
+    return _ONBOARD_OPEN_URLS.get(domain, _ONBOARD_OPEN_URLS["feishu"])
+
+
+def _post_registration(base_url: str, body: Dict[str, str]) -> dict:
+    """POST form-encoded data to the registration endpoint, return parsed JSON.
+
+    The registration endpoint returns JSON even on 4xx (e.g. poll returns
+    authorization_pending as a 400). We always parse the body regardless of
+    HTTP status.
+    """
+    url = f"{base_url}{_REGISTRATION_PATH}"
+    data = urlencode(body).encode("utf-8")
+    req = Request(url, data=data, headers={"Content-Type": "application/x-www-form-urlencoded"})
+    try:
+        with urlopen(req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except HTTPError as exc:
+        body_bytes = exc.read()
+        if body_bytes:
+            try:
+                return json.loads(body_bytes.decode("utf-8"))
+            except (ValueError, json.JSONDecodeError):
+                raise exc from None
+        raise
+
+
+def _init_registration(domain: str = "feishu") -> None:
+    """Verify the environment supports client_secret auth.
+
+    Raises RuntimeError if not supported.
+    """
+    base_url = _accounts_base_url(domain)
+    res = _post_registration(base_url, {"action": "init"})
+    methods = res.get("supported_auth_methods") or []
+    if "client_secret" not in methods:
+        raise RuntimeError(
+            f"Feishu / Lark registration environment does not support client_secret auth. "
+            f"Supported: {methods}"
+        )
+
+
+def _begin_registration(domain: str = "feishu") -> dict:
+    """Start the device-code flow. Returns device_code, qr_url, user_code, interval, expire_in."""
+    base_url = _accounts_base_url(domain)
+    res = _post_registration(base_url, {
+        "action": "begin",
+        "archetype": "PersonalAgent",
+        "auth_method": "client_secret",
+        "request_user_info": "open_id",
+    })
+    device_code = res.get("device_code")
+    if not device_code:
+        raise RuntimeError("Feishu / Lark registration did not return a device_code")
+    qr_url = res.get("verification_uri_complete", "")
+    if "?" in qr_url:
+        qr_url += "&from=hermes&tp=hermes"
+    else:
+        qr_url += "?from=hermes&tp=hermes"
+    return {
+        "device_code": device_code,
+        "qr_url": qr_url,
+        "user_code": res.get("user_code", ""),
+        "interval": res.get("interval") or 5,
+        "expire_in": res.get("expire_in") or 600,
+    }
+
+
+def _poll_registration(
+    *,
+    device_code: str,
+    interval: int,
+    expire_in: int,
+    domain: str = "feishu",
+) -> Optional[dict]:
+    """Poll until the user scans the QR code, or timeout/denial.
+
+    Returns dict with app_id, app_secret, domain, open_id on success.
+    Returns None on failure.
+    """
+    deadline = time.time() + expire_in
+    current_domain = domain
+    domain_switched = False
+    poll_count = 0
+
+    while time.time() < deadline:
+        base_url = _accounts_base_url(current_domain)
+        try:
+            res = _post_registration(base_url, {
+                "action": "poll",
+                "device_code": device_code,
+                "tp": "ob_app",
+            })
+        except (URLError, OSError, json.JSONDecodeError):
+            time.sleep(interval)
+            continue
+
+        poll_count += 1
+        if poll_count == 1:
+            print("  Fetching configuration results...", end="", flush=True)
+        elif poll_count % 6 == 0:
+            print(".", end="", flush=True)
+
+        # Domain auto-detection
+        user_info = res.get("user_info") or {}
+        tenant_brand = user_info.get("tenant_brand")
+        if tenant_brand == "lark" and not domain_switched:
+            current_domain = "lark"
+            domain_switched = True
+            # Fall through — server may return credentials in this same response.
+
+        # Success
+        if res.get("client_id") and res.get("client_secret"):
+            if poll_count > 0:
+                print()  # newline after "Fetching configuration results..." dots
+            return {
+                "app_id": res["client_id"],
+                "app_secret": res["client_secret"],
+                "domain": current_domain,
+                "open_id": user_info.get("open_id"),
+            }
+
+        # Terminal errors
+        error = res.get("error", "")
+        if error in ("access_denied", "expired_token"):
+            if poll_count > 0:
+                print()
+            logger.warning("[Feishu onboard] Registration %s", error)
+            return None
+
+        # authorization_pending or unknown — keep polling
+        time.sleep(interval)
+
+    if poll_count > 0:
+        print()
+    logger.warning("[Feishu onboard] Poll timed out after %ds", expire_in)
+    return None
+
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode()
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Verify bot connectivity via /open-apis/bot/v3/info.
+
+    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
+    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+    """
+    if FEISHU_AVAILABLE:
+        return _probe_bot_sdk(app_id, app_secret, domain)
+    return _probe_bot_http(app_id, app_secret, domain)
+
+
+def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
+    """Build a lark Client for the given credentials and domain."""
+    sdk_domain = LARK_DOMAIN if domain == "lark" else FEISHU_DOMAIN
+    return (
+        lark.Client.builder()
+        .app_id(app_id)
+        .app_secret(app_secret)
+        .domain(sdk_domain)
+        .log_level(lark.LogLevel.WARNING)
+        .build()
+    )
+
+
+def _parse_bot_response(data: dict) -> Optional[dict]:
+    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    if data.get("code") != 0:
+        return None
+    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
+    return {
+        "bot_name": bot.get("bot_name"),
+        "bot_open_id": bot.get("open_id"),
+    }
+
+
+def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Probe bot info using lark_oapi SDK."""
+    try:
+        client = _build_onboard_client(app_id, app_secret, domain)
+        resp = client.request(
+            method="GET",
+            url="/open-apis/bot/v3/info",
+            body=None,
+            raw_response=True,
+        )
+        return _parse_bot_response(json.loads(resp.content))
+    except Exception as exc:
+        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
+        return None
+
+
+def _probe_bot_http(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Fallback probe using raw HTTP (when lark_oapi is not installed)."""
+    base_url = _onboard_open_base_url(domain)
+    try:
+        token_data = json.dumps({"app_id": app_id, "app_secret": app_secret}).encode("utf-8")
+        token_req = Request(
+            f"{base_url}/open-apis/auth/v3/tenant_access_token/internal",
+            data=token_data,
+            headers={"Content-Type": "application/json"},
+        )
+        with urlopen(token_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            token_res = json.loads(resp.read().decode("utf-8"))
+
+        access_token = token_res.get("tenant_access_token")
+        if not access_token:
+            return None
+
+        bot_req = Request(
+            f"{base_url}/open-apis/bot/v3/info",
+            headers={
+                "Authorization": f"Bearer {access_token}",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(bot_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            bot_res = json.loads(resp.read().decode("utf-8"))
+
+        return _parse_bot_response(bot_res)
+    except (URLError, OSError, KeyError, json.JSONDecodeError) as exc:
+        logger.debug("[Feishu onboard] HTTP probe failed: %s", exc)
+        return None
+
+
+def qr_register(
+    *,
+    initial_domain: str = "feishu",
+    timeout_seconds: int = 600,
+) -> Optional[dict]:
+    """Run the Feishu / Lark scan-to-create QR registration flow.
+
+    Returns on success::
+
+        {
+            "app_id": str,
+            "app_secret": str,
+            "domain": "feishu" | "lark",
+            "open_id": str | None,
+            "bot_name": str | None,
+            "bot_open_id": str | None,
+        }
+
+    Returns None on expected failures (network, auth denied, timeout).
+    Unexpected errors (bugs, protocol regressions) propagate to the caller.
+    """
+    try:
+        return _qr_register_inner(initial_domain=initial_domain, timeout_seconds=timeout_seconds)
+    except (RuntimeError, URLError, OSError, json.JSONDecodeError) as exc:
+        logger.warning("[Feishu onboard] Registration failed: %s", exc)
+        return None
+
+
+def _qr_register_inner(
+    *,
+    initial_domain: str,
+    timeout_seconds: int,
+) -> Optional[dict]:
+    """Run init → begin → poll → probe. Raises on network/protocol errors."""
+    print("  Connecting to Feishu / Lark...", end="", flush=True)
+    _init_registration(initial_domain)
+    begin = _begin_registration(initial_domain)
+    print(" done.")
+
+    print()
+    qr_url = begin["qr_url"]
+    if _render_qr(qr_url):
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {qr_url}")
+    else:
+        print(f"  Open this URL in Feishu / Lark on your phone:\n\n  {qr_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+
+    result = _poll_registration(
+        device_code=begin["device_code"],
+        interval=begin["interval"],
+        expire_in=min(begin["expire_in"], timeout_seconds),
+        domain=initial_domain,
+    )
+    if not result:
+        return None
+
+    # Probe bot — best-effort, don't fail the registration
+    bot_info = probe_bot(result["app_id"], result["app_secret"], result["domain"])
+    if bot_info:
+        result["bot_name"] = bot_info.get("bot_name")
+        result["bot_open_id"] = bot_info.get("bot_open_id")
+    else:
+        result["bot_name"] = None
+        result["bot_open_id"] = None
+
+    return result
@@ -18,6 +18,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """

@@ -508,6 +509,19 @@ class MatrixAdapter(BasePlatformAdapter):
                    await api.session.close()
                    return False

+                # Import cross-signing private keys from SSSS and self-sign
+                # the current device. Required after any device-key rotation
+                # (fresh crypto.db, share_keys re-upload) — otherwise the
+                # device's self-signing signature is stale and peers refuse
+                # to share Megolm sessions with the rotated device.
+                recovery_key = os.getenv("MATRIX_RECOVERY_KEY", "").strip()
+                if recovery_key:
+                    try:
+                        await olm.verify_with_recovery_key(recovery_key)
+                        logger.info("Matrix: cross-signing verified via recovery key")
+                    except Exception as exc:
+                        logger.warning("Matrix: recovery key verification failed: %s", exc)
+
                client.crypto = olm
                logger.info(
                    "Matrix: E2EE enabled (store: %s%s)",
@@ -768,7 +782,7 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-                async with _aiohttp.ClientSession() as http:
+                async with _aiohttp.ClientSession(trust_env=True) as http:
                    async with http.get(image_url, timeout=_aiohttp.ClientTimeout(total=30)) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -1121,7 +1135,10 @@ class MatrixAdapter(BasePlatformAdapter):
            thread_id = relates_to.get("event_id")

        formatted_body = source_content.get("formatted_body")
-        is_mentioned = self._is_bot_mentioned(body, formatted_body)
+        # m.mentions.user_ids (MSC3952 / Matrix v1.7) — authoritative mention signal.
+        mentions_block = source_content.get("m.mentions") or {}
+        mention_user_ids = mentions_block.get("user_ids") if isinstance(mentions_block, dict) else None
+        is_mentioned = self._is_bot_mentioned(body, formatted_body, mention_user_ids)

        # Require-mention gating.
        if not is_dm:
@@ -1808,8 +1825,24 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

-    def _is_bot_mentioned(self, body: str, formatted_body: Optional[str] = None) -> bool:
-        """Return True if the bot is mentioned in the message."""
+    def _is_bot_mentioned(
+        self,
+        body: str,
+        formatted_body: Optional[str] = None,
+        mention_user_ids: Optional[list] = None,
+    ) -> bool:
+        """Return True if the bot is mentioned in the message.
+
+        Per MSC3952, ``m.mentions.user_ids`` is the authoritative mention
+        signal in the Matrix spec.  When the sender's client populates that
+        field with the bot's user-id, we trust it — even when the visible
+        body text does not contain an explicit ``@bot`` string (some clients
+        only render mention "pills" in ``formatted_body`` or use display
+        names).
+        """
+        # m.mentions.user_ids — authoritative per MSC3952 / Matrix v1.7.
+        if mention_user_ids and self._user_id and self._user_id in mention_user_ids:
+            return True
        if not body and not formatted_body:
            return False
        if self._user_id and self._user_id in body:
@@ -66,6 +66,8 @@ from gateway.platforms.base import (
    cache_audio_from_bytes,
    cache_document_from_bytes,
    SUPPORTED_DOCUMENT_TYPES,
+    utf16_len,
+    _prefix_within_utf16_limit,
 )
 from gateway.platforms.telegram_network import (
    TelegramFallbackTransport,
@@ -799,7 +801,9 @@ class TelegramAdapter(BasePlatformAdapter):
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
-            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            chunks = self.truncate_message(
+                formatted, self.MAX_MESSAGE_LENGTH, len_fn=utf16_len,
+            )
            if len(chunks) > 1:
                # truncate_message appends a raw " (1/2)" suffix. Escape the
                # MarkdownV2-special parentheses so Telegram doesn't reject the
@@ -970,7 +974,9 @@ class TelegramAdapter(BasePlatformAdapter):
            # streaming).  Truncate and succeed so the stream consumer can
            # split the overflow into a new message instead of dying.
            if "message_too_long" in err_str or "too long" in err_str:
-                truncated = content[: self.MAX_MESSAGE_LENGTH - 20] + "…"
+                truncated = _prefix_within_utf16_limit(
+                    content, self.MAX_MESSAGE_LENGTH - 20
+                ) + "…"
                try:
                    await self._bot.edit_message_text(
                        chat_id=int(chat_id),
@@ -266,7 +266,7 @@ class WeComAdapter(BasePlatformAdapter):
    async def _open_connection(self) -> None:
        """Open and authenticate a websocket connection."""
        await self._cleanup_ws()
-        self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession(trust_env=True)
        self._ws = await self._session.ws_connect(
            self._ws_url,
            heartbeat=HEARTBEAT_INTERVAL_SECONDS * 2,
@@ -112,6 +112,7 @@ TYPING_STOP = 2
 _HEADER_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
 _TABLE_RULE_RE = re.compile(r"^\s*\|?(?:\s*:?-{3,}:?\s*\|)+\s*:?-{3,}:?\s*\|?\s*$")
 _FENCE_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")


 def check_weixin_requirements() -> bool:
@@ -398,15 +399,16 @@ async def _send_message(
    context_token: Optional[str],
    client_id: str,
 ) -> None:
+    if not text or not text.strip():
+        raise ValueError("_send_message: text must not be empty")
    message: Dict[str, Any] = {
        "from_user_id": "",
        "to_user_id": to,
        "client_id": client_id,
        "message_type": MSG_TYPE_BOT,
        "message_state": MSG_STATE_FINISH,
+        "item_list": [{"type": ITEM_TEXT, "text_item": {"text": text}}],
    }
-    if text:
-        message["item_list"] = [{"type": ITEM_TEXT, "text_item": {"text": text}}]
    if context_token:
        message["context_token"] = context_token
    await _api_post(
@@ -499,13 +501,15 @@ async def _upload_ciphertext(
    session: "aiohttp.ClientSession",
    *,
    ciphertext: bytes,
-    cdn_base_url: str,
-    upload_param: str,
-    filekey: str,
+    upload_url: str,
 ) -> str:
-    url = _cdn_upload_url(cdn_base_url, upload_param, filekey)
+    """Upload encrypted media to the CDN.
+
+    Accepts either a constructed CDN URL (from upload_param) or a direct
+    upload_full_url — both use POST with the raw ciphertext as the body.
+    """
    timeout = aiohttp.ClientTimeout(total=120)
-    async with session.post(url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
+    async with session.post(upload_url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
        if response.status == 200:
            encrypted_param = response.headers.get("x-encrypted-param")
            if encrypted_param:
@@ -649,7 +653,7 @@ def _normalize_markdown_blocks(content: str) -> str:
            result.append(_rewrite_table_block_for_weixin(table_lines))
            continue

-        result.append(_rewrite_headers_for_weixin(line))
+        result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
        i += 1

    normalized = "\n".join(item.rstrip() for item in result)
@@ -811,6 +815,8 @@ def _split_text_for_weixin_delivery(
    ``platforms.weixin.extra.split_multiline_messages`` (``true`` / ``false``)
    or the env var ``WEIXIN_SPLIT_MULTILINE_MESSAGES``.
    """
+    if not content:
+        return []
    if split_per_line:
        # Legacy: one message per top-level delivery unit.
        if len(content) <= max_length and "\n" not in content:
@@ -821,14 +827,14 @@ def _split_text_for_weixin_delivery(
                chunks.append(unit)
                continue
            chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length))
-        return chunks or [content]
+        return [c for c in chunks if c] or [content]

    # Compact (default): single message when under the limit — unless the
    # content looks like a short chatty exchange, in which case split into
    # separate bubbles for a more natural chat feel.
    if len(content) <= max_length:
        return (
-            _split_delivery_units_for_weixin(content)
+            [u for u in _split_delivery_units_for_weixin(content) if u]
            if _should_split_short_chat_block_for_weixin(content)
            else [content]
        )
@@ -929,7 +935,7 @@ async def qr_login(
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError("aiohttp is required for Weixin QR login")

-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(trust_env=True) as session:
        try:
            qr_resp = await _api_get(
                session,
@@ -1042,6 +1048,10 @@ class WeixinAdapter(BasePlatformAdapter):

    MAX_MESSAGE_LENGTH = 4000

+    # WeChat does not support editing sent messages — streaming must use the
+    # fallback "send-final-only" path so the cursor (▉) is never left visible.
+    SUPPORTS_MESSAGE_EDITING = False
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.WEIXIN)
        extra = config.extra or {}
@@ -1124,7 +1134,7 @@ class WeixinAdapter(BasePlatformAdapter):
        except Exception as exc:
            logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)

-        self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession(trust_env=True)
        self._token_store.restore(self._account_id)
        self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
        self._mark_connected()
@@ -1451,7 +1461,7 @@ class WeixinAdapter(BasePlatformAdapter):
        context_token = self._token_store.get(self._account_id, chat_id)
        last_message_id: Optional[str] = None
        try:
-            chunks = self._split_text(self.format_message(content))
+            chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
            for idx, chunk in enumerate(chunks):
                client_id = f"hermes-weixin-{uuid.uuid4().hex}"
                await self._send_text_chunk(
@@ -1555,6 +1565,33 @@ class WeixinAdapter(BasePlatformAdapter):
            logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
            return SendResult(success=False, error=str(exc))

+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        if not self._session or not self._token:
+            return SendResult(success=False, error="Not connected")
+        try:
+            message_id = await self._send_file(chat_id, video_path, caption or "")
+            return SendResult(success=True, message_id=message_id)
+        except Exception as exc:
+            logger.error("[%s] send_video failed to=%s: %s", self.name, _safe_id(chat_id), exc)
+            return SendResult(success=False, error=str(exc))
+
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
+
    async def _download_remote_media(self, url: str) -> str:
        from tools.url_safety import is_safe_url

@@ -1577,6 +1614,7 @@ class WeixinAdapter(BasePlatformAdapter):
        filekey = secrets.token_hex(16)
        aes_key = secrets.token_bytes(16)
        rawsize = len(plaintext)
+        rawfilemd5 = hashlib.md5(plaintext).hexdigest()
        upload_response = await _get_upload_url(
            self._session,
            base_url=self._base_url,
@@ -1585,41 +1623,42 @@ class WeixinAdapter(BasePlatformAdapter):
            media_type=media_type,
            filekey=filekey,
            rawsize=rawsize,
-            rawfilemd5=hashlib.md5(plaintext).hexdigest(),
+            rawfilemd5=rawfilemd5,
            filesize=_aes_padded_size(rawsize),
            aeskey_hex=aes_key.hex(),
        )
        upload_param = str(upload_response.get("upload_param") or "")
        upload_full_url = str(upload_response.get("upload_full_url") or "")
        ciphertext = _aes128_ecb_encrypt(plaintext, aes_key)
-        if upload_param:
-            encrypted_query_param = await _upload_ciphertext(
-                self._session,
-                ciphertext=ciphertext,
-                cdn_base_url=self._cdn_base_url,
-                upload_param=upload_param,
-                filekey=filekey,
-            )
-        elif upload_full_url:
-            timeout = aiohttp.ClientTimeout(total=120)
-            async with self._session.put(
-                upload_full_url,
-                data=ciphertext,
-                headers={"Content-Type": "application/octet-stream"},
-                timeout=timeout,
-            ) as response:
-                response.raise_for_status()
-                encrypted_query_param = response.headers.get("x-encrypted-param") or filekey
+
+        # Prefer upload_full_url (direct CDN), fall back to constructed CDN URL
+        # from upload_param.  Both paths use POST — the old PUT for
+        # upload_full_url caused 404s on the WeChat CDN.
+        if upload_full_url:
+            upload_url = upload_full_url
+        elif upload_param:
+            upload_url = _cdn_upload_url(self._cdn_base_url, upload_param, filekey)
        else:
            raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")

+        encrypted_query_param = await _upload_ciphertext(
+            self._session,
+            ciphertext=ciphertext,
+            upload_url=upload_url,
+        )
+
        context_token = self._token_store.get(self._account_id, chat_id)
+        # The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
+        # Sending base64(raw_bytes) causes images to show as grey boxes on the
+        # receiver side because the decryption key doesn't match.
+        aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
        media_item = item_builder(
            encrypt_query_param=encrypted_query_param,
-            aes_key_b64=base64.b64encode(aes_key).decode("ascii"),
+            aes_key_for_api=aes_key_for_api,
            ciphertext_size=len(ciphertext),
            plaintext_size=rawsize,
            filename=Path(path).name,
+            rawfilemd5=rawfilemd5,
        )

        last_message_id = None
@@ -1659,39 +1698,53 @@ class WeixinAdapter(BasePlatformAdapter):
    def _outbound_media_builder(self, path: str):
        mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
        if mime.startswith("image/"):
-            return MEDIA_IMAGE, lambda **kwargs: {
+            return MEDIA_IMAGE, lambda **kw: {
                "type": ITEM_IMAGE,
                "image_item": {
                    "media": {
-                        "encrypt_query_param": kwargs["encrypt_query_param"],
-                        "aes_key": kwargs["aes_key_b64"],
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
                        "encrypt_type": 1,
                    },
-                    "mid_size": kwargs["ciphertext_size"],
+                    "mid_size": kw["ciphertext_size"],
                },
            }
        if mime.startswith("video/"):
-            return MEDIA_VIDEO, lambda **kwargs: {
+            return MEDIA_VIDEO, lambda **kw: {
                "type": ITEM_VIDEO,
                "video_item": {
                    "media": {
-                        "encrypt_query_param": kwargs["encrypt_query_param"],
-                        "aes_key": kwargs["aes_key_b64"],
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
                        "encrypt_type": 1,
                    },
-                    "video_size": kwargs["ciphertext_size"],
+                    "video_size": kw["ciphertext_size"],
+                    "play_length": kw.get("play_length", 0),
+                    "video_md5": kw.get("rawfilemd5", ""),
                },
            }
-        return MEDIA_FILE, lambda **kwargs: {
+        if mime.startswith("audio/") or path.endswith(".silk"):
+            return MEDIA_VOICE, lambda **kw: {
+                "type": ITEM_VOICE,
+                "voice_item": {
+                    "media": {
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
+                        "encrypt_type": 1,
+                    },
+                    "playtime": kw.get("playtime", 0),
+                },
+            }
+        return MEDIA_FILE, lambda **kw: {
            "type": ITEM_FILE,
            "file_item": {
                "media": {
-                    "encrypt_query_param": kwargs["encrypt_query_param"],
-                    "aes_key": kwargs["aes_key_b64"],
+                    "encrypt_query_param": kw["encrypt_query_param"],
+                    "aes_key": kw["aes_key_for_api"],
                    "encrypt_type": 1,
                },
-                "file_name": kwargs["filename"],
-                "len": str(kwargs["plaintext_size"]),
+                "file_name": kw["filename"],
+                "len": str(kw["plaintext_size"]),
            },
        }

@@ -1731,7 +1784,7 @@ async def send_weixin_direct(
    token_store.restore(account_id)
    context_token = token_store.get(account_id, chat_id)

-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(trust_env=True) as session:
        adapter = WeixinAdapter(
            PlatformConfig(
                enabled=True,
@@ -120,8 +120,9 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - session_path: Path to store WhatsApp session data
    """
    
-    # WhatsApp message limits
-    MAX_MESSAGE_LENGTH = 65536  # WhatsApp allows longer messages
+    # WhatsApp message limits — practical UX limit, not protocol max.
+    # WhatsApp allows ~65K but long messages are unreadable on mobile.
+    MAX_MESSAGE_LENGTH = 4096
    
    # Default bridge location relative to the hermes-agent install
    _DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
@@ -531,6 +532,63 @@ class WhatsAppAdapter(BasePlatformAdapter):
        self._close_bridge_log()
        print(f"[{self.name}] Disconnected")
    
+    def format_message(self, content: str) -> str:
+        """Convert standard markdown to WhatsApp-compatible formatting.
+
+        WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
+        and monospaced `inline`. Standard markdown uses different syntax
+        for bold/italic/strikethrough, so we convert here.
+
+        Code blocks (``` fenced) and inline code (`) are protected from
+        conversion via placeholder substitution.
+        """
+        if not content:
+            return content
+
+        # --- 1. Protect fenced code blocks from formatting changes ---
+        _FENCE_PH = "\x00FENCE"
+        fences: list[str] = []
+
+        def _save_fence(m: re.Match) -> str:
+            fences.append(m.group(0))
+            return f"{_FENCE_PH}{len(fences) - 1}\x00"
+
+        result = re.sub(r"```[\s\S]*?```", _save_fence, content)
+
+        # --- 2. Protect inline code ---
+        _CODE_PH = "\x00CODE"
+        codes: list[str] = []
+
+        def _save_code(m: re.Match) -> str:
+            codes.append(m.group(0))
+            return f"{_CODE_PH}{len(codes) - 1}\x00"
+
+        result = re.sub(r"`[^`\n]+`", _save_code, result)
+
+        # --- 3. Convert markdown formatting to WhatsApp syntax ---
+        # Bold: **text** or __text__ → *text*
+        result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
+        result = re.sub(r"__(.+?)__", r"*\1*", result)
+        # Strikethrough: ~~text~~ → ~text~
+        result = re.sub(r"~~(.+?)~~", r"~\1~", result)
+        # Italic: *text* is already WhatsApp italic — leave as-is
+        # _text_ is already WhatsApp italic — leave as-is
+
+        # --- 4. Convert markdown headers to bold text ---
+        # # Header → *Header*
+        result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
+
+        # --- 5. Convert markdown links: [text](url) → text (url) ---
+        result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
+
+        # --- 6. Restore protected sections ---
+        for i, fence in enumerate(fences):
+            result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
+        for i, code in enumerate(codes):
+            result = result.replace(f"{_CODE_PH}{i}\x00", code)
+
+        return result
+
    async def send(
        self,
        chat_id: str,
@@ -538,38 +596,57 @@ class WhatsAppAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None
    ) -> SendResult:
-        """Send a message via the WhatsApp bridge."""
+        """Send a message via the WhatsApp bridge.
+
+        Formats markdown for WhatsApp, splits long messages into chunks
+        that preserve code block boundaries, and sends each chunk sequentially.
+        """
        if not self._running or not self._http_session:
            return SendResult(success=False, error="Not connected")
        bridge_exit = await self._check_managed_bridge_exit()
        if bridge_exit:
            return SendResult(success=False, error=bridge_exit)
-        
+
+        if not content or not content.strip():
+            return SendResult(success=True, message_id=None)
+
        try:
            import aiohttp

-            payload = {
-                "chatId": chat_id,
-                "message": content,
-            }
-            if reply_to:
-                payload["replyTo"] = reply_to
-            
-            async with self._http_session.post(
-                f"http://127.0.0.1:{self._bridge_port}/send",
-                json=payload,
-                timeout=aiohttp.ClientTimeout(total=30)
-            ) as resp:
-                if resp.status == 200:
-                    data = await resp.json()
-                    return SendResult(
-                        success=True,
-                        message_id=data.get("messageId"),
-                        raw_response=data
-                    )
-                else:
-                    error = await resp.text()
-                    return SendResult(success=False, error=error)
+            # Format and chunk the message
+            formatted = self.format_message(content)
+            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+
+            last_message_id = None
+            for chunk in chunks:
+                payload: Dict[str, Any] = {
+                    "chatId": chat_id,
+                    "message": chunk,
+                }
+                if reply_to and last_message_id is None:
+                    # Only reply-to on the first chunk
+                    payload["replyTo"] = reply_to
+
+                async with self._http_session.post(
+                    f"http://127.0.0.1:{self._bridge_port}/send",
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        last_message_id = data.get("messageId")
+                    else:
+                        error = await resp.text()
+                        return SendResult(success=False, error=error)
+
+                # Small delay between chunks to avoid rate limiting
+                if len(chunks) > 1:
+                    await asyncio.sleep(0.3)
+
+            return SendResult(
+                success=True,
+                message_id=last_message_id,
+            )
        except Exception as e:
            return SendResult(success=False, error=str(e))

@@ -186,6 +186,8 @@ if _config_path.exists():
                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
+            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
+                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
        _display_cfg = _cfg.get("display", {})
@@ -876,13 +878,47 @@ class GatewayRunner:
                "api_mode": override.get("api_mode"),
            }
            if override_runtime.get("api_key"):
+                logger.debug(
+                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
+                    (resolved_session_key or "")[:30], model, override_model,
+                    override_runtime.get("provider"),
+                )
                return override_model, override_runtime
+            # Override exists but has no api_key — fall through to env-based
+            # resolution and apply model/provider from the override on top.
+            logger.debug(
+                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
+                (resolved_session_key or "")[:30], model, override_model,
+            )
+        else:
+            logger.debug(
+                "No session model override: session=%s config_model=%s override_keys=%s",
+                (resolved_session_key or "")[:30], model,
+                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
+            )

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        if override and resolved_session_key:
            model, runtime_kwargs = self._apply_session_model_override(
                resolved_session_key, model, runtime_kwargs
            )
+
+        # When the config has no model.default but a provider was resolved
+        # (e.g. user ran `hermes auth add openai-codex` without `hermes model`),
+        # fall back to the provider's first catalog model so the API call
+        # doesn't fail with "model must be a non-empty string".
+        if not model and runtime_kwargs.get("provider"):
+            try:
+                from hermes_cli.models import get_default_model_for_provider
+                model = get_default_model_for_provider(runtime_kwargs["provider"])
+                if model:
+                    logger.info(
+                        "No model configured — defaulting to %s for provider %s",
+                        model, runtime_kwargs["provider"],
+                    )
+            except Exception:
+                pass
+
        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@@ -1501,12 +1537,25 @@ class GatewayRunner:
        # This prevents stuck sessions from being blindly resumed on restart,
        # which can create an unrecoverable loop (#7536).  Suspended sessions
        # auto-reset on the next incoming message, giving the user a clean start.
-        try:
-            suspended = self.session_store.suspend_recently_active()
-            if suspended:
-                logger.info("Suspended %d in-flight session(s) from previous run", suspended)
-        except Exception as e:
-            logger.warning("Session suspension on startup failed: %s", e)
+        #
+        # SKIP suspension after a clean (graceful) shutdown — the previous
+        # process already drained active agents, so sessions aren't stuck.
+        # This prevents unwanted auto-resets after `hermes update`,
+        # `hermes gateway restart`, or `/restart`.
+        _clean_marker = _hermes_home / ".clean_shutdown"
+        if _clean_marker.exists():
+            logger.info("Previous gateway exited cleanly — skipping session suspension")
+            try:
+                _clean_marker.unlink()
+            except Exception:
+                pass
+        else:
+            try:
+                suspended = self.session_store.suspend_recently_active()
+                if suspended:
+                    logger.info("Suspended %d in-flight session(s) from previous run", suspended)
+            except Exception as e:
+                logger.warning("Session suspension on startup failed: %s", e)

        connected_count = 0
        enabled_platform_count = 0
@@ -1668,6 +1717,9 @@ class GatewayRunner:
        ):
            self._schedule_update_notification_watch()

+        # Notify the chat that initiated /restart that the gateway is back.
+        await self._send_restart_notification()
+
        # Drain any recovered process watchers (from crash recovery checkpoint)
        try:
            from tools.process_registry import process_registry
@@ -2032,6 +2084,15 @@ class GatewayRunner:
            from gateway.status import remove_pid_file
            remove_pid_file()

+            # Write a clean-shutdown marker so the next startup knows this
+            # wasn't a crash.  suspend_recently_active() only needs to run
+            # after unexpected exits — graceful shutdowns already drain
+            # active agents, so there's no stuck-session risk.
+            try:
+                (_hermes_home / ".clean_shutdown").touch()
+            except Exception:
+                pass
+
            if self._restart_requested and self._restart_via_service:
                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
                self._exit_reason = self._exit_reason or "Gateway restart requested"
@@ -2699,6 +2760,9 @@ class GatewayRunner:
        if canonical == "update":
            return await self._handle_update_command(event)

+        if canonical == "debug":
+            return await self._handle_debug_command(event)
+
        if canonical == "title":
            return await self._handle_title_command(event)

@@ -4086,11 +4150,36 @@ class GatewayRunner:
                return f"⏳ Draining {count} active agent(s) before restart..."
            return "⏳ Gateway restart already in progress..."

+        # Save the requester's routing info so the new gateway process can
+        # notify them once it comes back online.
+        try:
+            import json as _json
+            notify_data = {
+                "platform": event.source.platform.value if event.source.platform else None,
+                "chat_id": event.source.chat_id,
+            }
+            if event.source.thread_id:
+                notify_data["thread_id"] = event.source.thread_id
+            (_hermes_home / ".restart_notify.json").write_text(
+                _json.dumps(notify_data)
+            )
+        except Exception as e:
+            logger.debug("Failed to write restart notify file: %s", e)
+
        active_agents = self._running_agent_count()
-        self.request_restart(detached=True, via_service=False)
+        # When running under a service manager (systemd/launchd), use the
+        # service restart path: exit with code 75 so the service manager
+        # restarts us.  The detached subprocess approach (setsid + bash)
+        # doesn't work under systemd because KillMode=mixed kills all
+        # processes in the cgroup, including the detached helper.
+        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
+        if _under_service:
+            self.request_restart(detached=False, via_service=True)
+        else:
+            self.request_restart(detached=True, via_service=False)
        if active_agents:
            return f"⏳ Draining {active_agents} active agent(s) before restart..."
-        return "♻ Restarting gateway..."
+        return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."

    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
@@ -4304,6 +4393,11 @@ class GatewayRunner:
                            "api_mode": result.api_mode,
                        }

+                        # Evict cached agent so the next turn creates a fresh
+                        # agent from the override rather than relying on the
+                        # stale cache signature to trigger a rebuild.
+                        _self._evict_cached_agent(_session_key)
+
                        # Build confirmation text
                        plabel = result.provider_label or result.target_provider
                        lines = [f"Model switched to `{result.new_model}`"]
@@ -4417,6 +4511,10 @@ class GatewayRunner:
            "api_mode": result.api_mode,
        }

+        # Evict cached agent so the next turn creates a fresh agent from the
+        # override rather than relying on cache signature mismatch detection.
+        self._evict_cached_agent(session_key)
+
        # Persist to config if --global
        if persist_global:
            try:
@@ -6361,6 +6459,61 @@ class GatewayRunner:
        Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.LOCAL,
    })

+    async def _handle_debug_command(self, event: MessageEvent) -> str:
+        """Handle /debug — upload debug report + logs and return paste URLs."""
+        import asyncio
+        from hermes_cli.debug import (
+            _capture_dump, collect_debug_report, _read_full_log,
+            upload_to_pastebin,
+        )
+
+        loop = asyncio.get_running_loop()
+
+        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
+        def _collect_and_upload():
+            dump_text = _capture_dump()
+            report = collect_debug_report(log_lines=200, dump_text=dump_text)
+            agent_log = _read_full_log("agent")
+            gateway_log = _read_full_log("gateway")
+
+            if agent_log:
+                agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
+            if gateway_log:
+                gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+
+            urls = {}
+            failures = []
+
+            try:
+                urls["Report"] = upload_to_pastebin(report)
+            except Exception as exc:
+                return f"✗ Failed to upload debug report: {exc}"
+
+            if agent_log:
+                try:
+                    urls["agent.log"] = upload_to_pastebin(agent_log)
+                except Exception:
+                    failures.append("agent.log")
+
+            if gateway_log:
+                try:
+                    urls["gateway.log"] = upload_to_pastebin(gateway_log)
+                except Exception:
+                    failures.append("gateway.log")
+
+            lines = ["**Debug report uploaded:**", ""]
+            label_width = max(len(k) for k in urls)
+            for label, url in urls.items():
+                lines.append(f"`{label:<{label_width}}`  {url}")
+
+            if failures:
+                lines.append(f"\n_(failed to upload: {', '.join(failures)})_")
+
+            lines.append("\nShare these links with the Hermes team for support.")
+            return "\n".join(lines)
+
+        return await loop.run_in_executor(None, _collect_and_upload)
+
    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

@@ -6603,8 +6756,12 @@ class GatewayRunner:
            if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
                await _flush_buffer()

-            # Check for prompts
-            if prompt_path.exists() and session_key:
+            # Check for prompts — only forward if we haven't already sent
+            # one that's still awaiting a response.  Without this guard the
+            # watcher would re-read the same .update_prompt.json every poll
+            # cycle and spam the user with duplicate prompt messages.
+            if (prompt_path.exists() and session_key
+                    and not self._update_prompt_pending.get(session_key)):
                try:
                    prompt_data = json.loads(prompt_path.read_text())
                    prompt_text = prompt_data.get("prompt", "")
@@ -6636,6 +6793,11 @@ class GatewayRunner:
                                f"or type your answer directly."
                            )
                        self._update_prompt_pending[session_key] = True
+                        # Remove the prompt file so it isn't re-read on the
+                        # next poll cycle.  The update process only needs
+                        # .update_response to continue — it doesn't re-check
+                        # .update_prompt.json while waiting.
+                        prompt_path.unlink(missing_ok=True)
                        logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
                except (json.JSONDecodeError, OSError) as e:
                    logger.debug("Failed to read update prompt: %s", e)
@@ -6746,6 +6908,48 @@ class GatewayRunner:

        return True

+    async def _send_restart_notification(self) -> None:
+        """Notify the chat that initiated /restart that the gateway is back."""
+        import json as _json
+
+        notify_path = _hermes_home / ".restart_notify.json"
+        if not notify_path.exists():
+            return
+
+        try:
+            data = _json.loads(notify_path.read_text())
+            platform_str = data.get("platform")
+            chat_id = data.get("chat_id")
+            thread_id = data.get("thread_id")
+
+            if not platform_str or not chat_id:
+                return
+
+            platform = Platform(platform_str)
+            adapter = self.adapters.get(platform)
+            if not adapter:
+                logger.debug(
+                    "Restart notification skipped: %s adapter not connected",
+                    platform_str,
+                )
+                return
+
+            metadata = {"thread_id": thread_id} if thread_id else None
+            await adapter.send(
+                chat_id,
+                "♻ Gateway restarted successfully. Your session continues.",
+                metadata=metadata,
+            )
+            logger.info(
+                "Sent restart notification to %s:%s",
+                platform_str,
+                chat_id,
+            )
+        except Exception as e:
+            logger.warning("Restart notification failed: %s", e)
+        finally:
+            notify_path.unlink(missing_ok=True)
+
    def _set_session_env(self, context: SessionContext) -> list:
        """Set session context variables for the current async task.

@@ -7277,9 +7481,11 @@ class GatewayRunner:
                    _pl = get_tool_preview_max_len()
                    import json as _json
                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
-                    _cap = _pl if _pl > 0 else 200
-                    if len(args_str) > _cap:
-                        args_str = args_str[:_cap - 3] + "..."
+                    # When tool_preview_length is 0 (default), don't truncate
+                    # in verbose mode — the user explicitly asked for full
+                    # detail.  Platform message-length limits handle the rest.
+                    if _pl > 0 and len(args_str) > _pl:
+                        args_str = args_str[:_pl - 3] + "..."
                    msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                elif preview:
                    msg = f"{emoji} {tool_name}: \"{preview}\""
@@ -7545,6 +7751,10 @@ class GatewayRunner:
                    session_key=session_key,
                    user_config=user_config,
                )
+                logger.debug(
+                    "run_agent resolved: model=%s provider=%s session=%s",
+                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                )
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
@@ -7585,10 +7795,18 @@ class GatewayRunner:
                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
+                        # Platforms that don't support editing sent messages
+                        # (e.g. WeChat) must not show a cursor in intermediate
+                        # sends — the cursor would be permanently visible because
+                        # it can never be edited away.  Use an empty cursor for
+                        # such platforms so streaming still delivers the final
+                        # response, just without the typing indicator.
+                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
+                        _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
-                            cursor=_scfg.cursor,
+                            cursor=_effective_cursor,
                        )
                        _stream_consumer = GatewayStreamConsumer(
                            adapter=_adapter,
@@ -8046,8 +8264,16 @@ class GatewayRunner:
                    if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
                        agent = agent_holder[0]
                        if agent:
-                            pending_event = _adapter.get_pending_message(session_key)
-                            pending_text = pending_event.text if pending_event else None
+                            # Peek at the pending message text WITHOUT consuming it.
+                            # The message must remain in _pending_messages so the
+                            # post-run dequeue at _dequeue_pending_event() can
+                            # retrieve the full MessageEvent (with media metadata).
+                            # If we pop here, a race exists: the agent may finish
+                            # before checking _interrupt_requested, and the message
+                            # is lost — neither the interrupt path nor the dequeue
+                            # path finds it.
+                            _peek_event = _adapter._pending_messages.get(session_key)
+                            pending_text = _peek_event.text if _peek_event else None
                            logger.debug("Interrupt detected from adapter, signaling agent...")
                            agent.interrupt(pending_text)
                            _interrupt_detected.set()
@@ -8060,11 +8286,17 @@ class GatewayRunner:
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())

        # Periodic "still working" notifications for long-running tasks.
-        # Fires every 10 minutes so the user knows the agent hasn't died.
-        _NOTIFY_INTERVAL = 600  # 10 minutes
+        # Fires every N seconds so the user knows the agent hasn't died.
+        # Config: agent.gateway_notify_interval in config.yaml, or
+        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
+        # 0 = disable notifications.
+        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
+        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

        async def _notify_long_running():
+            if _NOTIFY_INTERVAL is None:
+                return  # Notifications disabled (gateway_notify_interval: 0)
            _notify_adapter = self.adapters.get(source.platform)
            if not _notify_adapter:
                return
@@ -8138,7 +8370,7 @@ class GatewayRunner:
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
-                            _bp_event = _backup_adapter.get_pending_message(session_key)
+                            _bp_event = _backup_adapter._pending_messages.get(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
@@ -8198,7 +8430,7 @@ class GatewayRunner:
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
-                            _bp_event = _backup_adapter.get_pending_message(session_key)
+                            _bp_event = _backup_adapter._pending_messages.get(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
@@ -1303,6 +1303,49 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    }


+def _write_codex_cli_tokens(
+    access_token: str,
+    refresh_token: str,
+    *,
+    last_refresh: Optional[str] = None,
+) -> None:
+    """Write refreshed tokens back to ~/.codex/auth.json.
+
+    OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
+    When Hermes refreshes a token it consumes the old refresh_token; if we
+    don't write the new pair back, the Codex CLI (or VS Code extension) will
+    fail with ``refresh_token_reused`` on its next refresh attempt.
+
+    This mirrors the Anthropic write-back to ~/.claude/.credentials.json
+    via ``_write_claude_code_credentials()``.
+    """
+    codex_home = os.getenv("CODEX_HOME", "").strip()
+    if not codex_home:
+        codex_home = str(Path.home() / ".codex")
+    auth_path = Path(codex_home).expanduser() / "auth.json"
+    try:
+        existing: Dict[str, Any] = {}
+        if auth_path.is_file():
+            existing = json.loads(auth_path.read_text(encoding="utf-8"))
+        if not isinstance(existing, dict):
+            existing = {}
+
+        tokens_dict = existing.get("tokens")
+        if not isinstance(tokens_dict, dict):
+            tokens_dict = {}
+        tokens_dict["access_token"] = access_token
+        tokens_dict["refresh_token"] = refresh_token
+        existing["tokens"] = tokens_dict
+        if last_refresh is not None:
+            existing["last_refresh"] = last_refresh
+
+        auth_path.parent.mkdir(parents=True, exist_ok=True)
+        auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        auth_path.chmod(0o600)
+    except (OSError, IOError) as exc:
+        logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
+
+
 def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
    """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
    if last_refresh is None:
@@ -1425,6 +1468,12 @@ def _refresh_codex_auth_tokens(
    updated_tokens["refresh_token"] = refreshed["refresh_token"]

    _save_codex_tokens(updated_tokens)
+    # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
+    _write_codex_cli_tokens(
+        refreshed["access_token"],
+        refreshed["refresh_token"],
+        last_refresh=refreshed.get("last_refresh"),
+    )
    return updated_tokens


@@ -201,7 +201,7 @@ def _validate_backup_zip(zf: zipfile.ZipFile) -> tuple[bool, str]:
        return False, "zip archive is empty"

    # Look for telltale files that a hermes home would have
-    markers = {"config.yaml", ".env", "hermes_state.db", "memory_store.db"}
+    markers = {"config.yaml", ".env", "state.db"}
    found = set()
    for n in names:
        # Could be at the root or one level deep (if someone zipped the directory)
@@ -11,6 +11,7 @@ Usage:

 import importlib.util
 import logging
+import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -52,6 +53,99 @@ _OPENCLAW_SCRIPT_INSTALLED = (
 # Known OpenClaw directory names (current + legacy)
 _OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moltbot")

+def _detect_openclaw_processes() -> list[str]:
+    """Detect running OpenClaw processes and services.
+
+    Returns a list of human-readable descriptions of what was found.
+    An empty list means nothing was detected.
+    """
+    found: list[str] = []
+
+    # -- systemd service (Linux) ------------------------------------------
+    if sys.platform != "win32":
+        try:
+            result = subprocess.run(
+                ["systemctl", "--user", "is-active", "openclaw-gateway.service"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                found.append("systemd service: openclaw-gateway.service")
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+    # -- process scan ------------------------------------------------------
+    if sys.platform == "win32":
+        try:
+            for exe in ("openclaw.exe", "clawd.exe"):
+                result = subprocess.run(
+                    ["tasklist", "/FI", f"IMAGENAME eq {exe}"],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if exe in result.stdout.lower():
+                    found.append(f"process: {exe}")
+
+            # Node.js-hosted OpenClaw — tasklist doesn't show command lines,
+            # so fall back to PowerShell.
+            ps_cmd = (
+                'Get-CimInstance Win32_Process -Filter "Name = \'node.exe\'" | '
+                'Where-Object { $_.CommandLine -match "openclaw|clawd" } | '
+                'Select-Object -First 1 ProcessId'
+            )
+            result = subprocess.run(
+                ["powershell", "-NoProfile", "-Command", ps_cmd],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip():
+                found.append(f"node.exe process with openclaw in command line (PID {result.stdout.strip()})")
+        except Exception:
+            pass
+    else:
+        try:
+            result = subprocess.run(
+                ["pgrep", "-f", "openclaw"],
+                capture_output=True, text=True, timeout=3,
+            )
+            if result.returncode == 0:
+                pids = result.stdout.strip().split()
+                found.append(f"openclaw process(es) (PIDs: {', '.join(pids)})")
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+    return found
+
+
+def _warn_if_openclaw_running(auto_yes: bool) -> None:
+    """Warn if OpenClaw is still running before migration.
+
+    Telegram, Discord, and Slack only allow one active connection per bot
+    token. Migrating while OpenClaw is running causes both to fight for the
+    same token.
+    """
+    running = _detect_openclaw_processes()
+    if not running:
+        return
+
+    print()
+    print_error("OpenClaw appears to be running:")
+    for detail in running:
+        print_info(f"  * {detail}")
+    print_info(
+        "Messaging platforms (Telegram, Discord, Slack) only allow one "
+        "active session per bot token. If you continue, both OpenClaw and "
+        "Hermes may try to use the same token, causing disconnects."
+    )
+    print_info("Recommendation: stop OpenClaw before migrating.")
+    print()
+    if auto_yes:
+        return
+    if not sys.stdin.isatty():
+        print_info("Non-interactive session — continuing to preview only.")
+        return
+    if not prompt_yes_no("Continue anyway?", default=False):
+        print_info("Migration cancelled. Stop OpenClaw and try again.")
+        sys.exit(0)
+
+
 def _warn_if_gateway_running(auto_yes: bool) -> None:
    """Check if a Hermes gateway is running with connected platforms.

@@ -287,8 +381,11 @@ def _cmd_migrate(args):
        print_info(f"Workspace:   {workspace_target}")
    print()

-    # Check if a gateway is running with connected platforms — migrating tokens
-    # while the gateway is active will cause conflicts (e.g. Telegram 409).
+    # Check if OpenClaw is still running — migrating tokens while both are
+    # active will cause conflicts (e.g. Telegram 409).
+    _warn_if_openclaw_running(auto_yes)
+
+    # Check if a Hermes gateway is running with connected platforms.
    _warn_if_gateway_running(auto_yes)

    # Ensure config.yaml exists before migration tries to read it
@@ -430,6 +527,28 @@ def _cmd_cleanup(args):
        print_success("No OpenClaw directories found. Nothing to clean up.")
        return

+    # Warn if OpenClaw is still running — archiving while the service is
+    # active causes it to recreate an empty skeleton directory (#8502).
+    running = _detect_openclaw_processes()
+    if running:
+        print()
+        print_error("OpenClaw appears to be still running:")
+        for detail in running:
+            print_info(f"  * {detail}")
+        print_info(
+            "Archiving .openclaw/ while the service is active may cause it to "
+            "immediately recreate an empty skeleton directory, destroying your config."
+        )
+        print_info("Stop OpenClaw first: systemctl --user stop openclaw-gateway.service")
+        print()
+        if not auto_yes:
+            if not sys.stdin.isatty():
+                print_info("Non-interactive session — aborting. Stop OpenClaw and re-run.")
+                return
+            if not prompt_yes_no("Proceed anyway?", default=False):
+                print_info("Aborted. Stop OpenClaw first, then re-run: hermes claw cleanup")
+                return
+
    total_archived = 0

    for source_dir in dirs_to_check:
@@ -129,6 +129,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
@@ -154,6 +155,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="<path>"),
    CommandDef("update", "Update Hermes Agent to the latest version", "Info",
               gateway_only=True),
+    CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),

    # Exit
    CommandDef("quit", "Exit the CLI", "Exit",
@@ -50,6 +50,7 @@ _EXTRA_ENV_KEYS = frozenset({
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
+    "MATRIX_RECOVERY_KEY",
 })
 import yaml

@@ -147,25 +148,6 @@ def managed_error(action: str = "modify configuration"):
 # Container-aware CLI (NixOS container mode)
 # =============================================================================

-def _is_inside_container() -> bool:
-    """Detect if we're already running inside a Docker/Podman container."""
-    # Standard Docker/Podman indicators
-    if os.path.exists("/.dockerenv"):
-        return True
-    # Podman uses /run/.containerenv
-    if os.path.exists("/run/.containerenv"):
-        return True
-    # Check cgroup for container runtime evidence (works for both Docker & Podman)
-    try:
-        with open("/proc/1/cgroup", "r") as f:
-            cgroup = f.read()
-            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
-                return True
-    except OSError:
-        pass
-    return False
-
-
 def get_container_exec_info() -> Optional[dict]:
    """Read container mode metadata from HERMES_HOME/.container-mode.

@@ -180,7 +162,8 @@ def get_container_exec_info() -> Optional[dict]:
    if os.environ.get("HERMES_DEV") == "1":
        return None

-    if _is_inside_container():
+    from hermes_constants import is_container
+    if is_container():
        return None

    container_mode_file = get_hermes_home() / ".container-mode"
@@ -354,6 +337,10 @@ DEFAULT_CONFIG = {
        # threshold before escalating to a full timeout.  The warning fires
        # once per run and does not interrupt the agent.  0 = disable warning.
        "gateway_timeout_warning": 900,
+        # Periodic "still working" notification interval (seconds).
+        # Sends a status message every N seconds so the user knows the
+        # agent hasn't died during long tasks.  0 = disable notifications.
+        "gateway_notify_interval": 600,
    },
    
    "terminal": {
@@ -1293,6 +1280,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_RECOVERY_KEY": {
+        "description": "Matrix recovery key for cross-signing verification after device key rotation (from Element: Settings → Security → Recovery Key)",
+        "prompt": "Matrix recovery key",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
    "BLUEBUBBLES_SERVER_URL": {
        "description": "BlueBubbles server URL for iMessage integration (e.g. http://192.168.1.10:1234)",
        "prompt": "BlueBubbles server URL",
@@ -2641,6 +2636,28 @@ def save_env_value_secure(key: str, value: str) -> Dict[str, Any]:



+def reload_env() -> int:
+    """Re-read ~/.hermes/.env into os.environ. Returns count of vars updated.
+
+    Adds/updates vars that changed and removes vars that were deleted from
+    the .env file (but only vars known to Hermes — OPTIONAL_ENV_VARS and
+    _EXTRA_ENV_KEYS — to avoid clobbering unrelated environment).
+    """
+    env_vars = load_env()
+    known_keys = set(OPTIONAL_ENV_VARS.keys()) | _EXTRA_ENV_KEYS
+    count = 0
+    for key, value in env_vars.items():
+        if os.environ.get(key) != value:
+            os.environ[key] = value
+            count += 1
+    # Remove known Hermes vars that are no longer in .env
+    for key in known_keys:
+        if key not in env_vars and key in os.environ:
+            del os.environ[key]
+            count += 1
+    return count
+
+
 def get_env_value(key: str) -> Optional[str]:
    """Get a value from ~/.hermes/.env or environment."""
    # Check environment first
@@ -0,0 +1,336 @@
+"""``hermes debug`` — debug tools for Hermes Agent.
+
+Currently supports:
+    hermes debug share    Upload debug report (system info + logs) to a
+                          paste service and print a shareable URL.
+"""
+
+import io
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Optional
+
+from hermes_constants import get_hermes_home
+
+
+# ---------------------------------------------------------------------------
+# Paste services — try paste.rs first, dpaste.com as fallback.
+# ---------------------------------------------------------------------------
+
+_PASTE_RS_URL = "https://paste.rs/"
+_DPASTE_COM_URL = "https://dpaste.com/api/"
+
+# Maximum bytes to read from a single log file for upload.
+# paste.rs caps at ~1 MB; we stay under that with headroom.
+_MAX_LOG_BYTES = 512_000
+
+
+def _upload_paste_rs(content: str) -> str:
+    """Upload to paste.rs.  Returns the paste URL.
+
+    paste.rs accepts a plain POST body and returns the URL directly.
+    """
+    data = content.encode("utf-8")
+    req = urllib.request.Request(
+        _PASTE_RS_URL, data=data, method="POST",
+        headers={
+            "Content-Type": "text/plain; charset=utf-8",
+            "User-Agent": "hermes-agent/debug-share",
+        },
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        url = resp.read().decode("utf-8").strip()
+    if not url.startswith("http"):
+        raise ValueError(f"Unexpected response from paste.rs: {url[:200]}")
+    return url
+
+
+def _upload_dpaste_com(content: str, expiry_days: int = 7) -> str:
+    """Upload to dpaste.com.  Returns the paste URL.
+
+    dpaste.com uses multipart form data.
+    """
+    boundary = "----HermesDebugBoundary9f3c"
+
+    def _field(name: str, value: str) -> str:
+        return (
+            f"--{boundary}\r\n"
+            f'Content-Disposition: form-data; name="{name}"\r\n'
+            f"\r\n"
+            f"{value}\r\n"
+        )
+
+    body = (
+        _field("content", content)
+        + _field("syntax", "text")
+        + _field("expiry_days", str(expiry_days))
+        + f"--{boundary}--\r\n"
+    ).encode("utf-8")
+
+    req = urllib.request.Request(
+        _DPASTE_COM_URL, data=body, method="POST",
+        headers={
+            "Content-Type": f"multipart/form-data; boundary={boundary}",
+            "User-Agent": "hermes-agent/debug-share",
+        },
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        url = resp.read().decode("utf-8").strip()
+    if not url.startswith("http"):
+        raise ValueError(f"Unexpected response from dpaste.com: {url[:200]}")
+    return url
+
+
+def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
+    """Upload *content* to a paste service, trying paste.rs then dpaste.com.
+
+    Returns the paste URL on success, raises on total failure.
+    """
+    errors: list[str] = []
+
+    # Try paste.rs first (simple, fast)
+    try:
+        return _upload_paste_rs(content)
+    except Exception as exc:
+        errors.append(f"paste.rs: {exc}")
+
+    # Fallback: dpaste.com (supports expiry)
+    try:
+        return _upload_dpaste_com(content, expiry_days=expiry_days)
+    except Exception as exc:
+        errors.append(f"dpaste.com: {exc}")
+
+    raise RuntimeError(
+        "Failed to upload to any paste service:\n  " + "\n  ".join(errors)
+    )
+
+
+# ---------------------------------------------------------------------------
+# Log file reading
+# ---------------------------------------------------------------------------
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the path if found, or None.
+    """
+    from hermes_cli.logs import LOG_FILES
+
+    filename = LOG_FILES.get(log_name)
+    if not filename:
+        return None
+
+    log_dir = get_hermes_home() / "logs"
+    primary = log_dir / filename
+    if primary.exists() and primary.stat().st_size > 0:
+        return primary
+
+    # Fall back to the most recent rotated file (.1).
+    rotated = log_dir / f"{filename}.1"
+    if rotated.exists() and rotated.stat().st_size > 0:
+        return rotated
+
+    return None
+
+
+def _read_log_tail(log_name: str, num_lines: int) -> str:
+    """Read the last *num_lines* from a log file, or return a placeholder."""
+    from hermes_cli.logs import _read_last_n_lines
+
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return "(file not found)"
+
+    try:
+        lines = _read_last_n_lines(log_path, num_lines)
+        return "".join(lines).rstrip("\n")
+    except Exception as exc:
+        return f"(error reading: {exc})"
+
+
+def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
+    """Read a log file for standalone upload.
+
+    Returns the file content (last *max_bytes* if truncated), or None if the
+    file doesn't exist or is empty.
+    """
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return None
+
+    try:
+        size = log_path.stat().st_size
+        if size == 0:
+            return None
+
+        if size <= max_bytes:
+            return log_path.read_text(encoding="utf-8", errors="replace")
+
+        # File is larger than max_bytes — read the tail.
+        with open(log_path, "rb") as f:
+            f.seek(size - max_bytes)
+            # Skip partial line at the seek point.
+            f.readline()
+            content = f.read().decode("utf-8", errors="replace")
+        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Debug report collection
+# ---------------------------------------------------------------------------
+
+def _capture_dump() -> str:
+    """Run ``hermes dump`` and return its stdout as a string."""
+    from hermes_cli.dump import run_dump
+
+    class _FakeArgs:
+        show_keys = False
+
+    old_stdout = sys.stdout
+    sys.stdout = capture = io.StringIO()
+    try:
+        run_dump(_FakeArgs())
+    except SystemExit:
+        pass
+    finally:
+        sys.stdout = old_stdout
+
+    return capture.getvalue()
+
+
+def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+    """Build the summary debug report: system dump + log tails.
+
+    Parameters
+    ----------
+    log_lines
+        Number of recent lines to include per log file.
+    dump_text
+        Pre-captured dump output.  If empty, ``hermes dump`` is run
+        internally.
+
+    Returns the report as a plain-text string ready for upload.
+    """
+    buf = io.StringIO()
+
+    if not dump_text:
+        dump_text = _capture_dump()
+    buf.write(dump_text)
+
+    # ── Recent log tails (summary only) ──────────────────────────────────
+    buf.write("\n\n")
+    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
+    buf.write(_read_log_tail("agent", log_lines))
+    buf.write("\n\n")
+
+    errors_lines = min(log_lines, 100)
+    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
+    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write("\n\n")
+
+    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
+    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write("\n")
+
+    return buf.getvalue()
+
+
+# ---------------------------------------------------------------------------
+# CLI entry points
+# ---------------------------------------------------------------------------
+
+def run_debug_share(args):
+    """Collect debug report + full logs, upload each, print URLs."""
+    log_lines = getattr(args, "lines", 200)
+    expiry = getattr(args, "expire", 7)
+    local_only = getattr(args, "local", False)
+
+    print("Collecting debug report...")
+
+    # Capture dump once — prepended to every paste for context.
+    dump_text = _capture_dump()
+
+    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
+    agent_log = _read_full_log("agent")
+    gateway_log = _read_full_log("gateway")
+
+    # Prepend dump header to each full log so every paste is self-contained.
+    if agent_log:
+        agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
+    if gateway_log:
+        gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+
+    if local_only:
+        print(report)
+        if agent_log:
+            print(f"\n\n{'=' * 60}")
+            print("FULL agent.log")
+            print(f"{'=' * 60}\n")
+            print(agent_log)
+        if gateway_log:
+            print(f"\n\n{'=' * 60}")
+            print("FULL gateway.log")
+            print(f"{'=' * 60}\n")
+            print(gateway_log)
+        return
+
+    print("Uploading...")
+    urls: dict[str, str] = {}
+    failures: list[str] = []
+
+    # 1. Summary report (required)
+    try:
+        urls["Report"] = upload_to_pastebin(report, expiry_days=expiry)
+    except RuntimeError as exc:
+        print(f"\nUpload failed: {exc}", file=sys.stderr)
+        print("\nFull report printed below — copy-paste it manually:\n")
+        print(report)
+        sys.exit(1)
+
+    # 2. Full agent.log (optional)
+    if agent_log:
+        try:
+            urls["agent.log"] = upload_to_pastebin(agent_log, expiry_days=expiry)
+        except Exception as exc:
+            failures.append(f"agent.log: {exc}")
+
+    # 3. Full gateway.log (optional)
+    if gateway_log:
+        try:
+            urls["gateway.log"] = upload_to_pastebin(gateway_log, expiry_days=expiry)
+        except Exception as exc:
+            failures.append(f"gateway.log: {exc}")
+
+    # Print results
+    label_width = max(len(k) for k in urls)
+    print(f"\nDebug report uploaded:")
+    for label, url in urls.items():
+        print(f"  {label:<{label_width}}  {url}")
+
+    if failures:
+        print(f"\n  (failed to upload: {', '.join(failures)})")
+
+    print(f"\nShare these links with the Hermes team for support.")
+
+
+def run_debug(args):
+    """Route debug subcommands."""
+    subcmd = getattr(args, "debug_command", None)
+    if subcmd == "share":
+        run_debug_share(args)
+    else:
+        # Default: show help
+        print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
+        print()
+        print("Commands:")
+        print("  share    Upload debug report to a paste service and print URL")
+        print()
+        print("Options:")
+        print("  --lines N    Number of log lines to include (default: 200)")
+        print("  --expire N   Paste expiry in days (default: 7)")
+        print("  --local      Print report locally instead of uploading")
@@ -44,6 +44,16 @@ def _redact(value: str) -> str:
 def _gateway_status() -> str:
    """Return a short gateway status string."""
    if sys.platform.startswith("linux"):
+        from hermes_constants import is_container
+        if is_container():
+            try:
+                from hermes_cli.gateway import find_gateway_pids
+                pids = find_gateway_pids()
+                if pids:
+                    return f"running (docker, pid {pids[0]})"
+                return "stopped (docker)"
+            except Exception:
+                return "stopped (docker)"
        try:
            from hermes_cli.gateway import get_service_name
            svc = get_service_name()
@@ -331,7 +331,7 @@ def is_linux() -> bool:
    return sys.platform.startswith('linux')


-from hermes_constants import is_termux, is_wsl
+from hermes_constants import is_container, is_termux, is_wsl


 def _wsl_systemd_operational() -> bool:
@@ -353,7 +353,9 @@ def _wsl_systemd_operational() -> bool:


 def supports_systemd_services() -> bool:
-    if not is_linux() or is_termux():
+    if not is_linux() or is_termux() or is_container():
+        return False
+    if shutil.which("systemctl") is None:
        return False
    if is_wsl():
        return _wsl_systemd_operational()
@@ -483,6 +485,21 @@ def _journalctl_cmd(system: bool = False) -> list[str]:
    return ["journalctl"] if system else ["journalctl", "--user"]


+def _run_systemctl(args: list[str], *, system: bool = False, **kwargs) -> subprocess.CompletedProcess:
+    """Run a systemctl command, raising RuntimeError if systemctl is missing.
+
+    Defense-in-depth: callers are gated by ``supports_systemd_services()``,
+    but this ensures any future caller that bypasses the gate still gets a
+    clear error instead of a raw ``FileNotFoundError`` traceback.
+    """
+    try:
+        return subprocess.run(_systemctl_cmd(system) + args, **kwargs)
+    except FileNotFoundError:
+        raise RuntimeError(
+            "systemctl is not available on this system"
+        ) from None
+
+
 def _service_scope_label(system: bool = False) -> str:
    return "system" if system else "user"

@@ -751,14 +768,22 @@ def _remap_path_for_user(path: str, target_home_dir: str) -> str:

      /root/.hermes/hermes-agent  -> /home/alice/.hermes/hermes-agent
      /opt/hermes                 -> /opt/hermes  (kept as-is)
+
+    Note: this function intentionally does NOT resolve symlinks. A venv's
+    ``bin/python`` is typically a symlink to the base interpreter (e.g. a
+    uv-managed CPython at ``~/.local/share/uv/python/.../python3.11``);
+    resolving that symlink swaps the unit's ``ExecStart`` to a bare Python
+    that has none of the venv's site-packages, so the service crashes on
+    the first ``import``. Keep the symlinked path so the venv activates
+    its own environment. Lexical expansion only via ``expanduser``.
    """
-    current_home = Path.home().resolve()
-    resolved = Path(path).resolve()
+    current_home = Path.home()
+    p = Path(path).expanduser()
    try:
-        relative = resolved.relative_to(current_home)
+        relative = p.relative_to(current_home)
        return str(Path(target_home_dir) / relative)
    except ValueError:
-        return str(resolved)
+        return str(p)


 def _hermes_home_for_target_user(target_home_dir: str) -> str:
@@ -929,7 +954,7 @@ def refresh_systemd_unit_if_needed(system: bool = False) -> bool:

    expected_user = _read_systemd_user_from_unit(unit_path) if system else None
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=expected_user), encoding="utf-8")
-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
    print(f"↻ Updated gateway {_service_scope_label(system)} service definition to match the current Hermes install")
    return True

@@ -1025,7 +1050,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
        if not systemd_unit_is_current(system=system):
            print(f"↻ Repairing outdated {_service_scope_label(system)} systemd service at: {unit_path}")
            refresh_systemd_unit_if_needed(system=system)
-            subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)
+            _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)
            print(f"✓ {_service_scope_label(system).capitalize()} service definition updated")
            return
        print(f"Service already installed at: {unit_path}")
@@ -1036,8 +1061,8 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    print(f"Installing {_service_scope_label(system)} systemd service to: {unit_path}")
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")

-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
-    subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
+    _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)

    print()
    print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
@@ -1063,15 +1088,15 @@ def systemd_uninstall(system: bool = False):
    if system:
        _require_root_for_system_service("uninstall")

-    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False, timeout=90)
-    subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False, timeout=30)
+    _run_systemctl(["stop", get_service_name()], system=system, check=False, timeout=90)
+    _run_systemctl(["disable", get_service_name()], system=system, check=False, timeout=30)

    unit_path = get_systemd_unit_path(system=system)
    if unit_path.exists():
        unit_path.unlink()
        print(f"✓ Removed {unit_path}")

-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")


@@ -1080,7 +1105,7 @@ def systemd_start(system: bool = False):
    if system:
        _require_root_for_system_service("start")
    refresh_systemd_unit_if_needed(system=system)
-    subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True, timeout=30)
+    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")


@@ -1089,7 +1114,7 @@ def systemd_stop(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("stop")
-    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True, timeout=90)
+    _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")


@@ -1105,7 +1130,7 @@ def systemd_restart(system: bool = False):
    if pid is not None and _request_gateway_self_restart(pid):
        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
        return
-    subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
+    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")


@@ -1129,14 +1154,16 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

-    subprocess.run(
-        _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"],
+    _run_systemctl(
+        ["status", get_service_name(), "--no-pager"],
+        system=system,
        capture_output=False,
        timeout=10,
    )

-    result = subprocess.run(
-        _systemctl_cmd(system) + ["is-active", get_service_name()],
+    result = _run_systemctl(
+        ["is-active", get_service_name()],
+        system=system,
        capture_output=True,
        text=True,
        timeout=10,
@@ -2100,12 +2127,6 @@ def _setup_dingtalk():
    _setup_standard_platform(dingtalk_platform)


-def _setup_feishu():
-    """Configure Feishu / Lark via the standard platform setup."""
-    feishu_platform = next(p for p in _PLATFORMS if p["key"] == "feishu")
-    _setup_standard_platform(feishu_platform)
-
-
 def _setup_wecom():
    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
@@ -2129,24 +2150,24 @@ def _is_service_running() -> bool:

        if user_unit_exists:
            try:
-                result = subprocess.run(
-                    _systemctl_cmd(False) + ["is-active", get_service_name()],
-                    capture_output=True, text=True, timeout=10,
+                result = _run_systemctl(
+                    ["is-active", get_service_name()],
+                    system=False, capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except subprocess.TimeoutExpired:
+            except (RuntimeError, subprocess.TimeoutExpired):
                pass

        if system_unit_exists:
            try:
-                result = subprocess.run(
-                    _systemctl_cmd(True) + ["is-active", get_service_name()],
-                    capture_output=True, text=True, timeout=10,
+                result = _run_systemctl(
+                    ["is-active", get_service_name()],
+                    system=True, capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except subprocess.TimeoutExpired:
+            except (RuntimeError, subprocess.TimeoutExpired):
                pass

        return False
@@ -2290,6 +2311,178 @@ def _setup_weixin():
        print_info(f"  User ID: {user_id}")


+def _setup_feishu():
+    """Interactive setup for Feishu / Lark — scan-to-create or manual credentials."""
+    print()
+    print(color("  ─── 🪽 Feishu / Lark Setup ───", Colors.CYAN))
+
+    existing_app_id = get_env_value("FEISHU_APP_ID")
+    existing_secret = get_env_value("FEISHU_APP_SECRET")
+    if existing_app_id and existing_secret:
+        print()
+        print_success("Feishu / Lark is already configured.")
+        if not prompt_yes_no("  Reconfigure Feishu / Lark?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to create a new bot automatically (recommended)",
+        "Enter existing App ID and App Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up Feishu / Lark?", method_choices, 0)
+
+    credentials = None
+    used_qr = False
+
+    if method_idx == 0:
+        # ── QR scan-to-create ──
+        try:
+            from gateway.platforms.feishu import qr_register
+        except Exception as exc:
+            print_error(f"  Feishu / Lark onboard import failed: {exc}")
+            qr_register = None
+
+        if qr_register is not None:
+            try:
+                credentials = qr_register()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  Feishu / Lark setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR registration failed: {exc}")
+        if credentials:
+            used_qr = True
+        if not credentials:
+            print_info("  QR setup did not complete. Continuing with manual input.")
+
+    # ── Manual credential input ──
+    if not credentials:
+        print()
+        print_info("  Go to https://open.feishu.cn/ (or https://open.larksuite.com/ for Lark)")
+        print_info("  Create an app, enable the Bot capability, and copy the credentials.")
+        print()
+        app_id = prompt("  App ID", password=False)
+        if not app_id:
+            print_warning("  Skipped — Feishu / Lark won't work without an App ID.")
+            return
+        app_secret = prompt("  App Secret", password=True)
+        if not app_secret:
+            print_warning("  Skipped — Feishu / Lark won't work without an App Secret.")
+            return
+
+        domain_choices = ["feishu (China)", "lark (International)"]
+        domain_idx = prompt_choice("  Domain", domain_choices, 0)
+        domain = "lark" if domain_idx == 1 else "feishu"
+
+        # Try to probe the bot with manual credentials
+        bot_name = None
+        try:
+            from gateway.platforms.feishu import probe_bot
+            bot_info = probe_bot(app_id, app_secret, domain)
+            if bot_info:
+                bot_name = bot_info.get("bot_name")
+                print_success(f"  Credentials verified — bot: {bot_name or 'unnamed'}")
+            else:
+                print_warning("  Could not verify bot connection. Credentials saved anyway.")
+        except Exception as exc:
+            print_warning(f"  Credential verification skipped: {exc}")
+
+        credentials = {
+            "app_id": app_id,
+            "app_secret": app_secret,
+            "domain": domain,
+            "open_id": None,
+            "bot_name": bot_name,
+        }
+
+    # ── Save core credentials ──
+    app_id = credentials["app_id"]
+    app_secret = credentials["app_secret"]
+    domain = credentials.get("domain", "feishu")
+    open_id = credentials.get("open_id")
+    bot_name = credentials.get("bot_name")
+
+    save_env_value("FEISHU_APP_ID", app_id)
+    save_env_value("FEISHU_APP_SECRET", app_secret)
+    save_env_value("FEISHU_DOMAIN", domain)
+    # Bot identity is resolved at runtime via _hydrate_bot_identity().
+
+    # ── Connection mode ──
+    if used_qr:
+        connection_mode = "websocket"
+    else:
+        print()
+        mode_choices = [
+            "WebSocket (recommended — no public URL needed)",
+            "Webhook (requires a reachable HTTP endpoint)",
+        ]
+        mode_idx = prompt_choice("  Connection mode", mode_choices, 0)
+        connection_mode = "webhook" if mode_idx == 1 else "websocket"
+        if connection_mode == "webhook":
+            print_info("  Webhook defaults: 127.0.0.1:8765/feishu/webhook")
+            print_info("  Override with FEISHU_WEBHOOK_HOST / FEISHU_WEBHOOK_PORT / FEISHU_WEBHOOK_PATH")
+            print_info("  For signature verification, set FEISHU_ENCRYPT_KEY and FEISHU_VERIFICATION_TOKEN")
+    save_env_value("FEISHU_CONNECTION_MODE", connection_mode)
+
+    if bot_name:
+        print()
+        print_success(f"  Bot created: {bot_name}")
+
+    # ── DM security policy ──
+    print()
+    access_choices = [
+        "Use DM pairing approval (recommended)",
+        "Allow all direct messages",
+        "Only allow listed user IDs",
+    ]
+    access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
+    if access_idx == 0:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
+        save_env_value("FEISHU_ALLOWED_USERS", "")
+        print_success("  DM pairing enabled.")
+        print_info("  Unknown users can request access; approve with `hermes pairing approve`.")
+    elif access_idx == 1:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "true")
+        save_env_value("FEISHU_ALLOWED_USERS", "")
+        print_warning("  Open DM access enabled for Feishu / Lark.")
+    else:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
+        default_allow = open_id or ""
+        allowlist = prompt("  Allowed user IDs (comma-separated)", default_allow, password=False).replace(" ", "")
+        save_env_value("FEISHU_ALLOWED_USERS", allowlist)
+        print_success("  Allowlist saved.")
+
+    # ── Group policy ──
+    print()
+    group_choices = [
+        "Respond only when @mentioned in groups (recommended)",
+        "Disable group chats",
+    ]
+    group_idx = prompt_choice("  How should group chats be handled?", group_choices, 0)
+    if group_idx == 0:
+        save_env_value("FEISHU_GROUP_POLICY", "open")
+        print_info("  Group chats enabled (bot must be @mentioned).")
+    else:
+        save_env_value("FEISHU_GROUP_POLICY", "disabled")
+        print_info("  Group chats disabled.")
+
+    # ── Home channel ──
+    print()
+    home_channel = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home_channel:
+        save_env_value("FEISHU_HOME_CHANNEL", home_channel)
+        print_success(f"  Home channel set to {home_channel}")
+
+    print()
+    print_success("🪽 Feishu / Lark configured!")
+    print_info(f"  App ID: {app_id}")
+    print_info(f"  Domain: {domain}")
+    if bot_name:
+        print_info(f"  Bot: {bot_name}")
+
+
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -2467,6 +2660,8 @@ def gateway_setup():
            _setup_signal()
        elif platform["key"] == "weixin":
            _setup_weixin()
+        elif platform["key"] == "feishu":
+            _setup_feishu()
        else:
            _setup_standard_platform(platform)

@@ -2606,6 +2801,15 @@ def gateway_command(args):
            print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
            sys.exit(1)
+        elif is_container():
+            print("Service installation is not needed inside a Docker container.")
+            print("The container runtime is your service manager — use Docker restart policies instead:")
+            print()
+            print("  docker run --restart unless-stopped ...   # auto-restart on crash/reboot")
+            print("  docker restart <container>                # manual restart")
+            print()
+            print("To run the gateway: hermes gateway run")
+            sys.exit(0)
        else:
            print("Service installation not supported on this platform.")
            print("Run manually: hermes gateway run")
@@ -2624,10 +2828,17 @@ def gateway_command(args):
            systemd_uninstall(system=system)
        elif is_macos():
            launchd_uninstall()
+        elif is_container():
+            print("Service uninstall is not applicable inside a Docker container.")
+            print("To stop the gateway, stop or remove the container:")
+            print()
+            print("  docker stop <container>")
+            print("  docker rm <container>")
+            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-    
+
    elif subcmd == "start":
        system = getattr(args, 'system', False)
        if is_termux():
@@ -2648,10 +2859,19 @@ def gateway_command(args):
            print()
            print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
            sys.exit(1)
+        elif is_container():
+            print("Service start is not applicable inside a Docker container.")
+            print("The gateway runs as the container's main process.")
+            print()
+            print("  docker start <container>     # start a stopped container")
+            print("  docker restart <container>   # restart a running container")
+            print()
+            print("Or run the gateway directly: hermes gateway run")
+            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-    
+
    elif subcmd == "stop":
        stop_all = getattr(args, 'all', False)
        system = getattr(args, 'system', False)
@@ -1107,6 +1107,7 @@ def select_provider_and_model(args=None):
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
                "model": entry.get("model", ""),
+                "api_mode": entry.get("api_mode", ""),
            }
        return custom_provider_map

@@ -1955,6 +1956,12 @@ def _model_flow_named_custom(config, provider_info):
    model["base_url"] = base_url
    if api_key:
        model["api_key"] = api_key
+    # Apply api_mode from custom_providers entry, or clear stale value
+    custom_api_mode = provider_info.get("api_mode", "")
+    if custom_api_mode:
+        model["api_mode"] = custom_api_mode
+    else:
+        model.pop("api_mode", None)  # let runtime auto-detect from URL
    save_config(cfg)
    deactivate_provider()

@@ -2492,8 +2499,11 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print()
        override = ""
    if override and base_url_env:
-        save_env_value(base_url_env, override)
-        effective_base = override
+        if not override.startswith(("http://", "https://")):
+            print("  Invalid URL — must start with http:// or https://. Keeping current value.")
+        else:
+            save_env_value(base_url_env, override)
+            effective_base = override

    # Model selection — resolution order:
    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
@@ -2824,6 +2834,12 @@ def cmd_dump(args):
    run_dump(args)


+def cmd_debug(args):
+    """Debug tools (share report, etc.)."""
+    from hermes_cli.debug import run_debug
+    run_debug(args)
+
+
 def cmd_config(args):
    """Configuration management."""
    from hermes_cli.config import config_command
@@ -2960,6 +2976,44 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    return default


+def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
+    """Build the web UI frontend if npm is available.
+
+    Args:
+        web_dir: Path to the ``web/`` source directory.
+        fatal: If True, print error guidance and return False on failure
+               instead of a soft warning (used by ``hermes web``).
+
+    Returns True if the build succeeded or was skipped (no package.json).
+    """
+    if not (web_dir / "package.json").exists():
+        return True
+    import shutil
+    npm = shutil.which("npm")
+    if not npm:
+        if fatal:
+            print("Web UI frontend not built and npm is not available.")
+            print("Install Node.js, then run:  cd web && npm install && npm run build")
+        return not fatal
+    print("→ Building web UI...")
+    r1 = subprocess.run([npm, "install", "--silent"], cwd=web_dir, capture_output=True)
+    if r1.returncode != 0:
+        print(f"  {'✗' if fatal else '⚠'} Web UI npm install failed"
+              + ("" if fatal else " (hermes web will not be available)"))
+        if fatal:
+            print("  Run manually:  cd web && npm install && npm run build")
+        return False
+    r2 = subprocess.run([npm, "run", "build"], cwd=web_dir, capture_output=True)
+    if r2.returncode != 0:
+        print(f"  {'✗' if fatal else '⚠'} Web UI build failed"
+              + ("" if fatal else " (hermes web will not be available)"))
+        if fatal:
+            print("  Run manually:  cd web && npm install && npm run build")
+        return False
+    print("  ✓ Web UI built")
+    return True
+
+
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.
    
@@ -3054,7 +3108,10 @@ def _update_via_zip(args):
                check=True,
            )
        _install_python_dependencies_with_optional_fallback(pip_cmd)
-    
+
+    # Build web UI frontend (optional — requires npm)
+    _build_web_ui(PROJECT_ROOT / "web")
+
    # Sync skills
    try:
        from tools.skills_sync import sync_skills
@@ -3801,7 +3858,10 @@ def cmd_update(args):
            if shutil.which("npm"):
                print("→ Updating Node.js dependencies...")
                subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False)
-        
+
+        # Build web UI frontend (optional — requires npm)
+        _build_web_ui(PROJECT_ROOT / "web")
+
        print()
        print("✓ Code updated!")
        
@@ -3929,6 +3989,26 @@ def cmd_update(args):
        print()
        print("✓ Update complete!")
        
+        # Write exit code *before* the gateway restart attempt.
+        # When running as ``hermes update --gateway`` (spawned by the gateway's
+        # /update command), this process lives inside the gateway's systemd
+        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
+        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
+        # us and the wrapping bash shell.  The shell never reaches its
+        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
+        # marker file is never created.  The new gateway's update watcher then
+        # polls for 30 minutes and sends a spurious timeout message.
+        #
+        # Writing the marker here — after git pull + pip install succeed but
+        # before we attempt the restart — ensures the new gateway sees it
+        # regardless of how we die.
+        if gateway_mode:
+            _exit_code_path = get_hermes_home() / ".update_exit_code"
+            try:
+                _exit_code_path.write_text("0")
+            except OSError:
+                pass
+        
        # Auto-restart ALL gateways after update.
        # The code update (git pull) is shared across all profiles, so every
        # running gateway needs restarting to pick up the new code.
@@ -4063,7 +4143,7 @@ def _coalesce_session_name_args(argv: list) -> list:
        "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth",
        "status", "cron", "doctor", "config", "pairing", "skills", "tools",
        "mcp", "sessions", "insights", "version", "update", "uninstall",
-        "profile",
+        "profile", "dashboard",
    }
    _SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"}

@@ -4213,18 +4293,24 @@ def cmd_profile(args):
                            print(f'  Add to your shell config (~/.bashrc or ~/.zshrc):')
                            print(f'    export PATH="$HOME/.local/bin:$PATH"')

+            # Profile dir for display
+            try:
+                profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
+            except ValueError:
+                profile_dir_display = str(profile_dir)
+
            # Next steps
            print(f"\nNext steps:")
            print(f"  {name} setup              Configure API keys and model")
            print(f"  {name} chat               Start chatting")
            print(f"  {name} gateway start      Start the messaging gateway")
            if clone or clone_all:
-                try:
-                    profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
-                except ValueError:
-                    profile_dir_display = str(profile_dir)
                print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
+            else:
+                print(f"\n  ⚠ This profile has no API keys yet. Run '{name} setup' first,")
+                print(f"    or it will inherit keys from your shell environment.")
+                print(f"  Edit {profile_dir_display}/SOUL.md to customize personality")
            print()

        except (ValueError, FileExistsError, FileNotFoundError) as e:
@@ -4335,6 +4421,27 @@ def cmd_profile(args):
            sys.exit(1)


+def cmd_dashboard(args):
+    """Start the web UI server."""
+    try:
+        import fastapi  # noqa: F401
+        import uvicorn  # noqa: F401
+    except ImportError:
+        print("Web UI dependencies not installed.")
+        print("Install them with:  pip install hermes-agent[web]")
+        sys.exit(1)
+
+    if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
+        sys.exit(1)
+
+    from hermes_cli.web_server import start_server
+    start_server(
+        host=args.host,
+        port=args.port,
+        open_browser=not args.no_open,
+    )
+
+
 def cmd_completion(args):
    """Print shell completion script."""
    from hermes_cli.profiles import generate_bash_completion, generate_zsh_completion
@@ -4400,6 +4507,7 @@ Examples:
    hermes logs -f                Follow agent.log in real time
    hermes logs errors            View errors.log
    hermes logs --since 1h        Lines from the last hour
+    hermes debug share             Upload debug report for support
    hermes update                 Update to latest version

 For more help on a command:
@@ -4929,6 +5037,43 @@ For more help on a command:
    )
    dump_parser.set_defaults(func=cmd_dump)

+    # =========================================================================
+    # debug command
+    # =========================================================================
+    debug_parser = subparsers.add_parser(
+        "debug",
+        help="Debug tools — upload logs and system info for support",
+        description="Debug utilities for Hermes Agent. Use 'hermes debug share' to "
+                    "upload a debug report (system info + recent logs) to a paste "
+                    "service and get a shareable URL.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""\
+Examples:
+    hermes debug share              Upload debug report and print URL
+    hermes debug share --lines 500  Include more log lines
+    hermes debug share --expire 30  Keep paste for 30 days
+    hermes debug share --local      Print report locally (no upload)
+""",
+    )
+    debug_sub = debug_parser.add_subparsers(dest="debug_command")
+    share_parser = debug_sub.add_parser(
+        "share",
+        help="Upload debug report to a paste service and print a shareable URL",
+    )
+    share_parser.add_argument(
+        "--lines", type=int, default=200,
+        help="Number of log lines to include per log file (default: 200)",
+    )
+    share_parser.add_argument(
+        "--expire", type=int, default=7,
+        help="Paste expiry in days (default: 7)",
+    )
+    share_parser.add_argument(
+        "--local", action="store_true",
+        help="Print the report locally instead of uploading",
+    )
+    debug_parser.set_defaults(func=cmd_debug)
+
    # =========================================================================
    # backup command
    # =========================================================================
@@ -5782,6 +5927,19 @@ For more help on a command:
    )
    completion_parser.set_defaults(func=cmd_completion)

+    # =========================================================================
+    # dashboard command
+    # =========================================================================
+    dashboard_parser = subparsers.add_parser(
+        "dashboard",
+        help="Start the web UI dashboard",
+        description="Launch the Hermes Agent web dashboard for managing config, API keys, and sessions",
+    )
+    dashboard_parser.add_argument("--port", type=int, default=9119, help="Port (default 9119)")
+    dashboard_parser.add_argument("--host", default="127.0.0.1", help="Host (default 127.0.0.1)")
+    dashboard_parser.add_argument("--no-open", action="store_true", help="Don't open browser automatically")
+    dashboard_parser.set_defaults(func=cmd_dashboard)
+
    # =========================================================================
    # logs command
    # =========================================================================
@@ -8,8 +8,9 @@ Different LLM providers expect model identifiers in different formats:
  hyphens: ``claude-sonnet-4-6``.
 - **Copilot** expects bare names *with* dots preserved:
  ``claude-sonnet-4.6``.
- **OpenCode Zen** follows the same dot-to-hyphen convention as
-  Anthropic: ``claude-sonnet-4-6``.
+- **OpenCode Zen** preserves dots for GPT/GLM/Gemini/Kimi/MiniMax-style
+  model IDs, but Claude still uses hyphenated native names like
+  ``claude-sonnet-4-6``.
 - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
 - **DeepSeek** only accepts two model identifiers:
  ``deepseek-chat`` and ``deepseek-reasoner``.
@@ -67,7 +68,6 @@ _AGGREGATOR_PROVIDERS: frozenset[str] = frozenset({
 # Providers that want bare names with dots replaced by hyphens.
 _DOT_TO_HYPHEN_PROVIDERS: frozenset[str] = frozenset({
    "anthropic",
-    "opencode-zen",
 })

 # Providers that want bare names with dots preserved.
@@ -329,6 +329,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
        >>> normalize_model_for_provider("claude-sonnet-4.6", "opencode-zen")
        'claude-sonnet-4-6'

+        >>> normalize_model_for_provider("minimax-m2.5-free", "opencode-zen")
+        'minimax-m2.5-free'
+
        >>> normalize_model_for_provider("deepseek-v3", "deepseek")
        'deepseek-chat'

@@ -351,7 +354,16 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
+    # --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
+    if provider == "opencode-zen":
+        bare = _strip_matching_provider_prefix(name, provider)
+        if "/" in bare:
+            return bare
+        if bare.lower().startswith("claude-"):
+            return _dots_to_hyphens(bare)
+        return bare
+
+    # --- Anthropic: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
        bare = _strip_matching_provider_prefix(name, provider)
        if "/" in bare:
@@ -70,13 +70,13 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
+        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
        "openai/gpt-5.4-mini",
-        "xiaomi/mimo-v2-pro",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
@@ -130,6 +130,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemma-4-26b-it",
    ],
    "zai": [
+        "glm-5.1",
        "glm-5",
        "glm-5-turbo",
        "glm-4.7",
@@ -546,6 +547,20 @@ _PROVIDER_ALIASES = {
 }


+def get_default_model_for_provider(provider: str) -> str:
+    """Return the default model for a provider, or empty string if unknown.
+
+    Uses the first entry in _PROVIDER_MODELS as the default.  This is the
+    model a user would be offered first in the ``hermes model`` picker.
+
+    Used as a fallback when the user has configured a provider but never
+    selected a model (e.g. ``hermes auth add openai-codex`` without
+    ``hermes model``).
+    """
+    models = _PROVIDER_MODELS.get(provider, [])
+    return models[0] if models else ""
+
+
 def _openrouter_model_is_free(pricing: Any) -> bool:
    """Return True when both prompt and completion pricing are zero."""
    if not isinstance(pricing, dict):
@@ -459,6 +459,16 @@ def create_profile(
                    dst.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(src, dst)

+    # Seed a default SOUL.md so the user has a file to customize immediately.
+    # Skipped when the profile already has one (from --clone / --clone-all).
+    soul_path = profile_dir / "SOUL.md"
+    if not soul_path.exists():
+        try:
+            from hermes_cli.default_soul import DEFAULT_SOUL_MD
+            soul_path.write_text(DEFAULT_SOUL_MD, encoding="utf-8")
+        except Exception:
+            pass  # best-effort — don't fail profile creation over this
+
    return profile_dir


@@ -104,7 +104,7 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
        "gemma-4-31b-it", "gemma-4-26b-it",
    ],
-    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
+    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
@@ -2232,6 +2232,7 @@ def setup_gateway(config: dict):
        from hermes_cli.gateway import (
            _is_service_installed,
            _is_service_running,
+            supports_systemd_services,
            has_conflicting_systemd_units,
            install_linux_gateway_from_setup,
            print_systemd_scope_conflict_warning,
@@ -2244,16 +2245,18 @@ def setup_gateway(config: dict):

        service_installed = _is_service_installed()
        service_running = _is_service_running()
+        supports_systemd = supports_systemd_services()
+        supports_service_manager = supports_systemd or _is_macos

        print()
-        if _is_linux and has_conflicting_systemd_units():
+        if supports_systemd and has_conflicting_systemd_units():
            print_systemd_scope_conflict_warning()
            print()

        if service_running:
            if prompt_yes_no("  Restart the gateway to pick up changes?", True):
                try:
-                    if _is_linux:
+                    if supports_systemd:
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
@@ -2262,14 +2265,14 @@ def setup_gateway(config: dict):
        elif service_installed:
            if prompt_yes_no("  Start the gateway service?", True):
                try:
-                    if _is_linux:
+                    if supports_systemd:
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
                except Exception as e:
                    print_error(f"  Start failed: {e}")
-        elif _is_linux or _is_macos:
-            svc_name = "systemd" if _is_linux else "launchd"
+        elif supports_service_manager:
+            svc_name = "systemd" if supports_systemd else "launchd"
            if prompt_yes_no(
                f"  Install the gateway as a {svc_name} service? (runs in background, starts on boot)",
                True,
@@ -2277,7 +2280,7 @@ def setup_gateway(config: dict):
                try:
                    installed_scope = None
                    did_install = False
-                    if _is_linux:
+                    if supports_systemd:
                        installed_scope, did_install = install_linux_gateway_from_setup(force=False)
                    else:
                        launchd_install(force=False)
@@ -2285,7 +2288,7 @@ def setup_gateway(config: dict):
                    print()
                    if did_install and prompt_yes_no("  Start the service now?", True):
                        try:
-                            if _is_linux:
+                            if supports_systemd:
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
@@ -2296,12 +2299,21 @@ def setup_gateway(config: dict):
                    print_info("  You can try manually: hermes gateway install")
            else:
                print_info("  You can install later: hermes gateway install")
-                if _is_linux:
+                if supports_systemd:
                    print_info("  Or as a boot-time service: sudo hermes gateway install --system")
                print_info("  Or run in foreground:  hermes gateway")
        else:
-            print_info("Start the gateway to bring your bots online:")
-            print_info("   hermes gateway              # Run in foreground")
+            from hermes_constants import is_container
+            if is_container():
+                print_info("Start the gateway to bring your bots online:")
+                print_info("   hermes gateway run          # Run as container main process")
+                print_info("")
+                print_info("For automatic restarts, use a Docker restart policy:")
+                print_info("   docker run --restart unless-stopped ...")
+                print_info("   docker restart <container>  # Manual restart")
+            else:
+                print_info("Start the gateway to bring your bots online:")
+                print_info("   hermes gateway              # Run in foreground")

        print_info("━" * 50)

@@ -335,7 +335,23 @@ def do_install(identifier: str, category: str = "", force: bool = False,
    meta, bundle, _matched_source = _resolve_source_meta_and_bundle(identifier, sources)

    if not bundle:
-        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.\n")
+        # Check if any source hit GitHub API rate limit
+        rate_limited = any(
+            getattr(src, "is_rate_limited", False)
+            or getattr(getattr(src, "github", None), "is_rate_limited", False)
+            for src in sources
+        )
+        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.")
+        if rate_limited:
+            c.print(
+                "[yellow]Hint:[/] GitHub API rate limit exhausted "
+                "(unauthenticated: 60 requests/hour).\n"
+                "Set [bold]GITHUB_TOKEN[/] in your .env or install the "
+                "[bold]gh[/] CLI and run [bold]gh auth login[/] "
+                "to raise the limit to 5,000/hr.\n"
+            )
+        else:
+            c.print()
        return

    # Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
@@ -346,23 +346,35 @@ def show_status(args):
            print("  Note:         Android may stop background jobs when Termux is suspended")

    elif sys.platform.startswith('linux'):
-        try:
-            from hermes_cli.gateway import get_service_name
-            _gw_svc = get_service_name()
-        except Exception:
-            _gw_svc = "hermes-gateway"
-        try:
-            result = subprocess.run(
-                ["systemctl", "--user", "is-active", _gw_svc],
-                capture_output=True,
-                text=True,
-                timeout=5
-            )
-            is_active = result.stdout.strip() == "active"
-        except (FileNotFoundError, subprocess.TimeoutExpired):
-            is_active = False
-        print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
-        print("  Manager:      systemd (user)")
+        from hermes_constants import is_container
+        if is_container():
+            # Docker/Podman: no systemd — check for running gateway processes
+            try:
+                from hermes_cli.gateway import find_gateway_pids
+                gateway_pids = find_gateway_pids()
+                is_active = len(gateway_pids) > 0
+            except Exception:
+                is_active = False
+            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+            print("  Manager:      docker (foreground)")
+        else:
+            try:
+                from hermes_cli.gateway import get_service_name
+                _gw_svc = get_service_name()
+            except Exception:
+                _gw_svc = "hermes-gateway"
+            try:
+                result = subprocess.run(
+                    ["systemctl", "--user", "is-active", _gw_svc],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
+                is_active = result.stdout.strip() == "active"
+            except (FileNotFoundError, subprocess.TimeoutExpired):
+                is_active = False
+            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+            print("  Manager:      systemd (user)")
        
    elif sys.platform == 'darwin':
        from hermes_cli.gateway import get_launchd_label
@@ -0,0 +1,929 @@
+"""
+Hermes Agent — Web UI server.
+
+Provides a FastAPI backend serving the Vite/React frontend and REST API
+endpoints for managing configuration, environment variables, and sessions.
+
+Usage:
+    python -m hermes_cli.main web          # Start on http://127.0.0.1:9119
+    python -m hermes_cli.main web --port 8080
+"""
+
+import logging
+import os
+import secrets
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from hermes_cli import __version__, __release_date__
+from hermes_cli.config import (
+    DEFAULT_CONFIG,
+    OPTIONAL_ENV_VARS,
+    get_config_path,
+    get_env_path,
+    get_hermes_home,
+    load_config,
+    load_env,
+    save_config,
+    save_env_value,
+    remove_env_value,
+    check_config_version,
+    redact_key,
+)
+from gateway.status import get_running_pid, read_runtime_status
+
+try:
+    from fastapi import FastAPI, HTTPException, Request
+    from fastapi.middleware.cors import CORSMiddleware
+    from fastapi.responses import FileResponse, JSONResponse
+    from fastapi.staticfiles import StaticFiles
+    from pydantic import BaseModel
+except ImportError:
+    raise SystemExit(
+        "Web UI requires fastapi and uvicorn.\n"
+        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+    )
+
+WEB_DIST = Path(__file__).parent / "web_dist"
+_log = logging.getLogger(__name__)
+
+app = FastAPI(title="Hermes Agent", version=__version__)
+
+# ---------------------------------------------------------------------------
+# Session token for protecting sensitive endpoints (reveal).
+# Generated fresh on every server start — dies when the process exits.
+# Injected into the SPA HTML so only the legitimate web UI can use it.
+# ---------------------------------------------------------------------------
+_SESSION_TOKEN = secrets.token_urlsafe(32)
+
+# Simple rate limiter for the reveal endpoint
+_reveal_timestamps: List[float] = []
+_REVEAL_MAX_PER_WINDOW = 5
+_REVEAL_WINDOW_SECONDS = 30
+
+# CORS: restrict to localhost origins only.  The web UI is intended to run
+# locally; binding to 0.0.0.0 with allow_origins=["*"] would let any website
+# read/modify config and secrets.
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origin_regex=r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$",
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ---------------------------------------------------------------------------
+# Config schema — auto-generated from DEFAULT_CONFIG
+# ---------------------------------------------------------------------------
+
+# Manual overrides for fields that need select options or custom types
+_SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
+    "model": {
+        "type": "string",
+        "description": "Default model (e.g. anthropic/claude-sonnet-4.6)",
+        "category": "general",
+    },
+    "terminal.backend": {
+        "type": "select",
+        "description": "Terminal execution backend",
+        "options": ["local", "docker", "ssh", "modal", "daytona", "singularity"],
+    },
+    "terminal.modal_mode": {
+        "type": "select",
+        "description": "Modal sandbox mode",
+        "options": ["sandbox", "function"],
+    },
+    "tts.provider": {
+        "type": "select",
+        "description": "Text-to-speech provider",
+        "options": ["edge", "elevenlabs", "openai", "neutts"],
+    },
+    "stt.provider": {
+        "type": "select",
+        "description": "Speech-to-text provider",
+        "options": ["local", "openai", "mistral"],
+    },
+    "display.skin": {
+        "type": "select",
+        "description": "CLI visual theme",
+        "options": ["default", "ares", "mono", "slate"],
+    },
+    "display.resume_display": {
+        "type": "select",
+        "description": "How resumed sessions display history",
+        "options": ["minimal", "full", "off"],
+    },
+    "display.busy_input_mode": {
+        "type": "select",
+        "description": "Input behavior while agent is running",
+        "options": ["queue", "interrupt", "block"],
+    },
+    "memory.provider": {
+        "type": "select",
+        "description": "Memory provider plugin",
+        "options": ["builtin", "honcho"],
+    },
+    "approvals.mode": {
+        "type": "select",
+        "description": "Dangerous command approval mode",
+        "options": ["ask", "yolo", "deny"],
+    },
+    "context.engine": {
+        "type": "select",
+        "description": "Context management engine",
+        "options": ["default", "custom"],
+    },
+    "human_delay.mode": {
+        "type": "select",
+        "description": "Simulated typing delay mode",
+        "options": ["off", "typing", "fixed"],
+    },
+    "logging.level": {
+        "type": "select",
+        "description": "Log level for agent.log",
+        "options": ["DEBUG", "INFO", "WARNING", "ERROR"],
+    },
+    "agent.service_tier": {
+        "type": "select",
+        "description": "API service tier (OpenAI/Anthropic)",
+        "options": ["", "auto", "default", "flex"],
+    },
+    "delegation.reasoning_effort": {
+        "type": "select",
+        "description": "Reasoning effort for delegated subagents",
+        "options": ["", "low", "medium", "high"],
+    },
+}
+
+# Categories with fewer fields get merged into "general" to avoid tab sprawl.
+_CATEGORY_MERGE: Dict[str, str] = {
+    "privacy": "security",
+    "context": "agent",
+    "skills": "agent",
+    "cron": "agent",
+    "network": "agent",
+    "checkpoints": "agent",
+    "approvals": "security",
+    "human_delay": "display",
+    "smart_model_routing": "agent",
+}
+
+# Display order for tabs — unlisted categories sort alphabetically after these.
+_CATEGORY_ORDER = [
+    "general", "agent", "terminal", "display", "delegation",
+    "memory", "compression", "security", "browser", "voice",
+    "tts", "stt", "logging", "discord", "auxiliary",
+]
+
+
+def _infer_type(value: Any) -> str:
+    """Infer a UI field type from a Python value."""
+    if isinstance(value, bool):
+        return "boolean"
+    if isinstance(value, int):
+        return "number"
+    if isinstance(value, float):
+        return "number"
+    if isinstance(value, list):
+        return "list"
+    if isinstance(value, dict):
+        return "object"
+    return "string"
+
+
+def _build_schema_from_config(
+    config: Dict[str, Any],
+    prefix: str = "",
+) -> Dict[str, Dict[str, Any]]:
+    """Walk DEFAULT_CONFIG and produce a flat dot-path → field schema dict."""
+    schema: Dict[str, Dict[str, Any]] = {}
+    for key, value in config.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+
+        # Skip internal / version keys
+        if full_key in ("_config_version",):
+            continue
+
+        # Category is the first path component for nested keys, or "general"
+        # for top-level scalar fields (model, toolsets, timezone, etc.).
+        if prefix:
+            category = prefix.split(".")[0]
+        elif isinstance(value, dict):
+            category = key
+        else:
+            category = "general"
+
+        if isinstance(value, dict):
+            # Recurse into nested dicts
+            schema.update(_build_schema_from_config(value, full_key))
+        else:
+            entry: Dict[str, Any] = {
+                "type": _infer_type(value),
+                "description": full_key.replace(".", " → ").replace("_", " ").title(),
+                "category": category,
+            }
+            # Apply manual overrides
+            if full_key in _SCHEMA_OVERRIDES:
+                entry.update(_SCHEMA_OVERRIDES[full_key])
+            # Merge small categories
+            entry["category"] = _CATEGORY_MERGE.get(entry["category"], entry["category"])
+            schema[full_key] = entry
+    return schema
+
+
+CONFIG_SCHEMA = _build_schema_from_config(DEFAULT_CONFIG)
+
+
+class ConfigUpdate(BaseModel):
+    config: dict
+
+
+class EnvVarUpdate(BaseModel):
+    key: str
+    value: str
+
+
+class EnvVarDelete(BaseModel):
+    key: str
+
+
+class EnvVarReveal(BaseModel):
+    key: str
+
+
+@app.get("/api/status")
+async def get_status():
+    current_ver, latest_ver = check_config_version()
+
+    gateway_pid = get_running_pid()
+    gateway_running = gateway_pid is not None
+
+    gateway_state = None
+    gateway_platforms: dict = {}
+    gateway_exit_reason = None
+    gateway_updated_at = None
+    configured_gateway_platforms: set[str] | None = None
+    try:
+        from gateway.config import load_gateway_config
+
+        gateway_config = load_gateway_config()
+        configured_gateway_platforms = {
+            platform.value for platform in gateway_config.get_connected_platforms()
+        }
+    except Exception:
+        configured_gateway_platforms = None
+
+    runtime = read_runtime_status()
+    if runtime:
+        gateway_state = runtime.get("gateway_state")
+        gateway_platforms = runtime.get("platforms") or {}
+        if configured_gateway_platforms is not None:
+            gateway_platforms = {
+                key: value
+                for key, value in gateway_platforms.items()
+                if key in configured_gateway_platforms
+            }
+        gateway_exit_reason = runtime.get("exit_reason")
+        gateway_updated_at = runtime.get("updated_at")
+        if not gateway_running:
+            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
+            gateway_platforms = {}
+
+    active_sessions = 0
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            sessions = db.list_sessions_rich(limit=50)
+            now = time.time()
+            active_sessions = sum(
+                1 for s in sessions
+                if s.get("ended_at") is None
+                and (now - s.get("last_active", s.get("started_at", 0))) < 300
+            )
+        finally:
+            db.close()
+    except Exception:
+        pass
+
+    return {
+        "version": __version__,
+        "release_date": __release_date__,
+        "hermes_home": str(get_hermes_home()),
+        "config_path": str(get_config_path()),
+        "env_path": str(get_env_path()),
+        "config_version": current_ver,
+        "latest_config_version": latest_ver,
+        "gateway_running": gateway_running,
+        "gateway_pid": gateway_pid,
+        "gateway_state": gateway_state,
+        "gateway_platforms": gateway_platforms,
+        "gateway_exit_reason": gateway_exit_reason,
+        "gateway_updated_at": gateway_updated_at,
+        "active_sessions": active_sessions,
+    }
+
+
+@app.get("/api/sessions")
+async def get_sessions():
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            sessions = db.list_sessions_rich(limit=20)
+            now = time.time()
+            for s in sessions:
+                s["is_active"] = (
+                    s.get("ended_at") is None
+                    and (now - s.get("last_active", s.get("started_at", 0))) < 300
+                )
+            return sessions
+        finally:
+            db.close()
+    except Exception as e:
+        _log.exception("GET /api/sessions failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.get("/api/sessions/search")
+async def search_sessions(q: str = "", limit: int = 20):
+    """Full-text search across session message content using FTS5."""
+    if not q or not q.strip():
+        return {"results": []}
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            # Auto-add prefix wildcards so partial words match
+            # e.g. "nimb" → "nimb*" matches "nimby"
+            # Preserve quoted phrases and existing wildcards as-is
+            import re
+            terms = []
+            for token in re.findall(r'"[^"]*"|\S+', q.strip()):
+                if token.startswith('"') or token.endswith("*"):
+                    terms.append(token)
+                else:
+                    terms.append(token + "*")
+            prefix_query = " ".join(terms)
+            matches = db.search_messages(query=prefix_query, limit=limit)
+            # Group by session_id — return unique sessions with their best snippet
+            seen: dict = {}
+            for m in matches:
+                sid = m["session_id"]
+                if sid not in seen:
+                    seen[sid] = {
+                        "session_id": sid,
+                        "snippet": m.get("snippet", ""),
+                        "role": m.get("role"),
+                        "source": m.get("source"),
+                        "model": m.get("model"),
+                        "session_started": m.get("session_started"),
+                    }
+            return {"results": list(seen.values())}
+        finally:
+            db.close()
+    except Exception:
+        _log.exception("GET /api/sessions/search failed")
+        raise HTTPException(status_code=500, detail="Search failed")
+
+
+def _normalize_config_for_web(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Normalize config for the web UI.
+
+    Hermes supports ``model`` as either a bare string (``"anthropic/claude-sonnet-4"``)
+    or a dict (``{default: ..., provider: ..., base_url: ...}``).  The schema is built
+    from DEFAULT_CONFIG where ``model`` is a string, but user configs often have the
+    dict form.  Normalize to the string form so the frontend schema matches.
+    """
+    config = dict(config)  # shallow copy
+    model_val = config.get("model")
+    if isinstance(model_val, dict):
+        config["model"] = model_val.get("default", model_val.get("name", ""))
+    return config
+
+
+@app.get("/api/config")
+async def get_config():
+    config = _normalize_config_for_web(load_config())
+    # Strip internal keys that the frontend shouldn't see or send back
+    return {k: v for k, v in config.items() if not k.startswith("_")}
+
+
+@app.get("/api/config/defaults")
+async def get_defaults():
+    return DEFAULT_CONFIG
+
+
+@app.get("/api/config/schema")
+async def get_schema():
+    return {"fields": CONFIG_SCHEMA, "category_order": _CATEGORY_ORDER}
+
+
+def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Reverse _normalize_config_for_web before saving.
+
+    Reconstructs ``model`` as a dict by reading the current on-disk config
+    to recover model subkeys (provider, base_url, api_mode, etc.) that were
+    stripped from the GET response.  The frontend only sees model as a flat
+    string; the rest is preserved transparently.
+    """
+    config = dict(config)
+    # Remove any _model_meta that might have leaked in (shouldn't happen
+    # with the stripped GET response, but be defensive)
+    config.pop("_model_meta", None)
+
+    model_val = config.get("model")
+    if isinstance(model_val, str) and model_val:
+        # Read the current disk config to recover model subkeys
+        try:
+            disk_config = load_config()
+            disk_model = disk_config.get("model")
+            if isinstance(disk_model, dict):
+                # Preserve all subkeys, update default with the new value
+                disk_model["default"] = model_val
+                config["model"] = disk_model
+        except Exception:
+            pass  # can't read disk config — just use the string form
+    return config
+
+
+@app.put("/api/config")
+async def update_config(body: ConfigUpdate):
+    try:
+        save_config(_denormalize_config_from_web(body.config))
+        return {"ok": True}
+    except Exception as e:
+        _log.exception("PUT /api/config failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.get("/api/auth/session-token")
+async def get_session_token():
+    """Return the ephemeral session token for this server instance.
+
+    The token protects sensitive endpoints (reveal).  It's served to the SPA
+    which stores it in memory — it's never persisted and dies when the server
+    process exits.  CORS already restricts this to localhost origins.
+    """
+    return {"token": _SESSION_TOKEN}
+
+
+@app.get("/api/env")
+async def get_env_vars():
+    env_on_disk = load_env()
+    result = {}
+    for var_name, info in OPTIONAL_ENV_VARS.items():
+        value = env_on_disk.get(var_name)
+        result[var_name] = {
+            "is_set": bool(value),
+            "redacted_value": redact_key(value) if value else None,
+            "description": info.get("description", ""),
+            "url": info.get("url"),
+            "category": info.get("category", ""),
+            "is_password": info.get("password", False),
+            "tools": info.get("tools", []),
+            "advanced": info.get("advanced", False),
+        }
+    return result
+
+
+@app.put("/api/env")
+async def set_env_var(body: EnvVarUpdate):
+    try:
+        save_env_value(body.key, body.value)
+        return {"ok": True, "key": body.key}
+    except Exception as e:
+        _log.exception("PUT /api/env failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.delete("/api/env")
+async def remove_env_var(body: EnvVarDelete):
+    try:
+        removed = remove_env_value(body.key)
+        if not removed:
+            raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
+        return {"ok": True, "key": body.key}
+    except HTTPException:
+        raise
+    except Exception as e:
+        _log.exception("DELETE /api/env failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.post("/api/env/reveal")
+async def reveal_env_var(body: EnvVarReveal, request: Request):
+    """Return the real (unredacted) value of a single env var.
+
+    Protected by:
+    - Ephemeral session token (generated per server start, injected into SPA)
+    - Rate limiting (max 5 reveals per 30s window)
+    - Audit logging
+    """
+    # --- Token check ---
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+    # --- Rate limit ---
+    now = time.time()
+    cutoff = now - _REVEAL_WINDOW_SECONDS
+    _reveal_timestamps[:] = [t for t in _reveal_timestamps if t > cutoff]
+    if len(_reveal_timestamps) >= _REVEAL_MAX_PER_WINDOW:
+        raise HTTPException(status_code=429, detail="Too many reveal requests. Try again shortly.")
+    _reveal_timestamps.append(now)
+
+    # --- Reveal ---
+    env_on_disk = load_env()
+    value = env_on_disk.get(body.key)
+    if value is None:
+        raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
+
+    _log.info("env/reveal: %s", body.key)
+    return {"key": body.key, "value": value}
+
+
+# ---------------------------------------------------------------------------
+# Session detail endpoints
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/sessions/{session_id}")
+async def get_session_detail(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        sid = db.resolve_session_id(session_id)
+        session = db.get_session(sid) if sid else None
+        if not session:
+            raise HTTPException(status_code=404, detail="Session not found")
+        return session
+    finally:
+        db.close()
+
+
+@app.get("/api/sessions/{session_id}/messages")
+async def get_session_messages(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        sid = db.resolve_session_id(session_id)
+        if not sid:
+            raise HTTPException(status_code=404, detail="Session not found")
+        messages = db.get_messages(sid)
+        return {"session_id": sid, "messages": messages}
+    finally:
+        db.close()
+
+
+@app.delete("/api/sessions/{session_id}")
+async def delete_session_endpoint(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        if not db.delete_session(session_id):
+            raise HTTPException(status_code=404, detail="Session not found")
+        return {"ok": True}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Log viewer endpoint
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/logs")
+async def get_logs(
+    file: str = "agent",
+    lines: int = 100,
+    level: Optional[str] = None,
+    component: Optional[str] = None,
+):
+    from hermes_cli.logs import _read_tail, LOG_FILES
+
+    log_name = LOG_FILES.get(file)
+    if not log_name:
+        raise HTTPException(status_code=400, detail=f"Unknown log file: {file}")
+    log_path = get_hermes_home() / "logs" / log_name
+    if not log_path.exists():
+        return {"file": file, "lines": []}
+
+    try:
+        from hermes_logging import COMPONENT_PREFIXES
+    except ImportError:
+        COMPONENT_PREFIXES = {}
+
+    has_filters = bool(level or component)
+    comp_prefixes = COMPONENT_PREFIXES.get(component, ()) if component else ()
+    result = _read_tail(
+        log_path, min(lines, 500),
+        has_filters=has_filters,
+        min_level=level,
+        component_prefixes=comp_prefixes,
+    )
+    return {"file": file, "lines": result}
+
+
+# ---------------------------------------------------------------------------
+# Cron job management endpoints
+# ---------------------------------------------------------------------------
+
+
+class CronJobCreate(BaseModel):
+    prompt: str
+    schedule: str
+    name: str = ""
+    deliver: str = "local"
+
+
+class CronJobUpdate(BaseModel):
+    updates: dict
+
+
+@app.get("/api/cron/jobs")
+async def list_cron_jobs():
+    from cron.jobs import list_jobs
+    return list_jobs(include_disabled=True)
+
+
+@app.get("/api/cron/jobs/{job_id}")
+async def get_cron_job(job_id: str):
+    from cron.jobs import get_job
+    job = get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs")
+async def create_cron_job(body: CronJobCreate):
+    from cron.jobs import create_job
+    try:
+        job = create_job(prompt=body.prompt, schedule=body.schedule,
+                         name=body.name, deliver=body.deliver)
+        return job
+    except Exception as e:
+        _log.exception("POST /api/cron/jobs failed")
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@app.put("/api/cron/jobs/{job_id}")
+async def update_cron_job(job_id: str, body: CronJobUpdate):
+    from cron.jobs import update_job
+    job = update_job(job_id, body.updates)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/pause")
+async def pause_cron_job(job_id: str):
+    from cron.jobs import pause_job
+    job = pause_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/resume")
+async def resume_cron_job(job_id: str):
+    from cron.jobs import resume_job
+    job = resume_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/trigger")
+async def trigger_cron_job(job_id: str):
+    from cron.jobs import trigger_job
+    job = trigger_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.delete("/api/cron/jobs/{job_id}")
+async def delete_cron_job(job_id: str):
+    from cron.jobs import remove_job
+    if not remove_job(job_id):
+        raise HTTPException(status_code=404, detail="Job not found")
+    return {"ok": True}
+
+
+# ---------------------------------------------------------------------------
+# Skills & Tools endpoints
+# ---------------------------------------------------------------------------
+
+
+class SkillToggle(BaseModel):
+    name: str
+    enabled: bool
+
+
+@app.get("/api/skills")
+async def get_skills():
+    from tools.skills_tool import _find_all_skills
+    from hermes_cli.skills_config import get_disabled_skills
+    config = load_config()
+    disabled = get_disabled_skills(config)
+    skills = _find_all_skills(skip_disabled=True)
+    for s in skills:
+        s["enabled"] = s["name"] not in disabled
+    return skills
+
+
+@app.put("/api/skills/toggle")
+async def toggle_skill(body: SkillToggle):
+    from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills
+    config = load_config()
+    disabled = get_disabled_skills(config)
+    if body.enabled:
+        disabled.discard(body.name)
+    else:
+        disabled.add(body.name)
+    save_disabled_skills(config, disabled)
+    return {"ok": True, "name": body.name, "enabled": body.enabled}
+
+
+@app.get("/api/tools/toolsets")
+async def get_toolsets():
+    from hermes_cli.tools_config import (
+        _get_effective_configurable_toolsets,
+        _get_platform_tools,
+        _toolset_has_keys,
+    )
+    from toolsets import resolve_toolset
+
+    config = load_config()
+    enabled_toolsets = _get_platform_tools(
+        config,
+        "cli",
+        include_default_mcp_servers=False,
+    )
+    result = []
+    for name, label, desc in _get_effective_configurable_toolsets():
+        try:
+            tools = sorted(set(resolve_toolset(name)))
+        except Exception:
+            tools = []
+        is_enabled = name in enabled_toolsets
+        result.append({
+            "name": name, "label": label, "description": desc,
+            "enabled": is_enabled,
+            "available": is_enabled,
+            "configured": _toolset_has_keys(name, config),
+            "tools": tools,
+        })
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Raw YAML config endpoint
+# ---------------------------------------------------------------------------
+
+
+class RawConfigUpdate(BaseModel):
+    yaml_text: str
+
+
+@app.get("/api/config/raw")
+async def get_config_raw():
+    path = get_config_path()
+    if not path.exists():
+        return {"yaml": ""}
+    return {"yaml": path.read_text(encoding="utf-8")}
+
+
+@app.put("/api/config/raw")
+async def update_config_raw(body: RawConfigUpdate):
+    try:
+        parsed = yaml.safe_load(body.yaml_text)
+        if not isinstance(parsed, dict):
+            raise HTTPException(status_code=400, detail="YAML must be a mapping")
+        save_config(parsed)
+        return {"ok": True}
+    except yaml.YAMLError as e:
+        raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
+
+
+# ---------------------------------------------------------------------------
+# Token / cost analytics endpoint
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/analytics/usage")
+async def get_usage_analytics(days: int = 30):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        cutoff = time.time() - (days * 86400)
+        cur = db._conn.execute("""
+            SELECT date(started_at, 'unixepoch') as day,
+                   SUM(input_tokens) as input_tokens,
+                   SUM(output_tokens) as output_tokens,
+                   SUM(cache_read_tokens) as cache_read_tokens,
+                   SUM(reasoning_tokens) as reasoning_tokens,
+                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
+                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
+                   COUNT(*) as sessions
+            FROM sessions WHERE started_at > ?
+            GROUP BY day ORDER BY day
+        """, (cutoff,))
+        daily = [dict(r) for r in cur.fetchall()]
+
+        cur2 = db._conn.execute("""
+            SELECT model,
+                   SUM(input_tokens) as input_tokens,
+                   SUM(output_tokens) as output_tokens,
+                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
+                   COUNT(*) as sessions
+            FROM sessions WHERE started_at > ? AND model IS NOT NULL
+            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
+        """, (cutoff,))
+        by_model = [dict(r) for r in cur2.fetchall()]
+
+        cur3 = db._conn.execute("""
+            SELECT SUM(input_tokens) as total_input,
+                   SUM(output_tokens) as total_output,
+                   SUM(cache_read_tokens) as total_cache_read,
+                   SUM(reasoning_tokens) as total_reasoning,
+                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
+                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
+                   COUNT(*) as total_sessions
+            FROM sessions WHERE started_at > ?
+        """, (cutoff,))
+        totals = dict(cur3.fetchone())
+
+        return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days}
+    finally:
+        db.close()
+
+
+def mount_spa(application: FastAPI):
+    """Mount the built SPA. Falls back to index.html for client-side routing."""
+    if not WEB_DIST.exists():
+        @application.get("/{full_path:path}")
+        async def no_frontend(full_path: str):
+            return JSONResponse(
+                {"error": "Frontend not built. Run: cd web && npm run build"},
+                status_code=404,
+            )
+        return
+
+    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")
+
+    @application.get("/{full_path:path}")
+    async def serve_spa(full_path: str):
+        file_path = WEB_DIST / full_path
+        # Prevent path traversal via url-encoded sequences (%2e%2e/)
+        if (
+            full_path
+            and file_path.resolve().is_relative_to(WEB_DIST.resolve())
+            and file_path.exists()
+            and file_path.is_file()
+        ):
+            return FileResponse(file_path)
+        return FileResponse(
+            WEB_DIST / "index.html",
+            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
+        )
+
+
+mount_spa(app)
+
+
+def start_server(host: str = "127.0.0.1", port: int = 9119, open_browser: bool = True):
+    """Start the web UI server."""
+    import uvicorn
+
+    if host not in ("127.0.0.1", "localhost", "::1"):
+        import logging
+        logging.warning(
+            "Binding to %s — the web UI exposes config and API keys. "
+            "Only bind to non-localhost if you trust all users on the network.", host,
+        )
+
+    if open_browser:
+        import threading
+        import webbrowser
+
+        def _open():
+            import time as _t
+            _t.sleep(1.0)
+            webbrowser.open(f"http://{host}:{port}")
+
+        threading.Thread(target=_open, daemon=True).start()
+
+    print(f"  Hermes Web UI → http://{host}:{port}")
+    uvicorn.run(app, host=host, port=port, log_level="warning")
@@ -189,6 +189,37 @@ def is_wsl() -> bool:
    return _wsl_detected


+_container_detected: bool | None = None
+
+
+def is_container() -> bool:
+    """Return True when running inside a Docker/Podman container.
+
+    Checks ``/.dockerenv`` (Docker), ``/run/.containerenv`` (Podman),
+    and ``/proc/1/cgroup`` for container runtime markers.  Result is
+    cached for the process lifetime.  Import-safe — no heavy deps.
+    """
+    global _container_detected
+    if _container_detected is not None:
+        return _container_detected
+    if os.path.exists("/.dockerenv"):
+        _container_detected = True
+        return True
+    if os.path.exists("/run/.containerenv"):
+        _container_detected = True
+        return True
+    try:
+        with open("/proc/1/cgroup", "r") as f:
+            cgroup = f.read()
+            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
+                _container_detected = True
+                return True
+    except OSError:
+        pass
+    _container_detected = False
+    return False
+
+
 # ─── Well-Known Paths ─────────────────────────────────────────────────────────


@@ -19,6 +19,9 @@
    "agent-browser": "^0.13.0",
    "@askjo/camoufox-browser": "^1.0.0"
  },
+  "overrides": {
+    "lodash": "4.18.1"
+  },
  "engines": {
    "node": ">=18.0.0"
  }
@@ -76,6 +76,7 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 feishu = ["lark-oapi>=1.5.3,<2"]
+web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
@@ -107,6 +108,7 @@ all = [
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
  "hermes-agent[mistral]",
+  "hermes-agent[web]",
 ]

 [project.scripts]
@@ -117,6 +119,9 @@ hermes-acp = "acp_adapter.entry:main"
 [tool.setuptools]
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]

+[tool.setuptools.package-data]
+hermes_cli = ["web_dist/**/*"]
+
 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

@@ -94,7 +94,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -1307,6 +1307,7 @@ class AIAgent:
                api_key=getattr(self, "api_key", ""),
                config_context_length=_config_context_length,
                provider=self.provider,
+                api_mode=self.api_mode,
            )
        self.compression_enabled = compression_enabled

@@ -1563,6 +1564,7 @@ class AIAgent:
                base_url=self.base_url,
                api_key=getattr(self, "api_key", ""),
                provider=self.provider,
+                api_mode=self.api_mode,
            )

        # ── Invalidate cached system prompt so it rebuilds next turn ──
@@ -1696,6 +1698,16 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

+    def _current_main_runtime(self) -> Dict[str, str]:
+        """Return the live main runtime for session-scoped auxiliary routing."""
+        return {
+            "model": getattr(self, "model", "") or "",
+            "provider": getattr(self, "provider", "") or "",
+            "base_url": getattr(self, "base_url", "") or "",
+            "api_key": getattr(self, "api_key", "") or "",
+            "api_mode": getattr(self, "api_mode", "") or "",
+        }
+
    def _check_compression_model_feasibility(self) -> None:
        """Warn at session start if the auxiliary compression model's context
        window is smaller than the main model's compression threshold.
@@ -1716,7 +1728,10 @@ class AIAgent:
            from agent.auxiliary_client import get_text_auxiliary_client
            from agent.model_metadata import get_model_context_length

-            client, aux_model = get_text_auxiliary_client("compression")
+            client, aux_model = get_text_auxiliary_client(
+                "compression",
+                main_runtime=self._current_main_runtime(),
+            )
            if client is None or not aux_model:
                msg = (
                    "⚠ No auxiliary LLM provider configured — context "
@@ -1733,10 +1748,25 @@ class AIAgent:

            aux_base_url = str(getattr(client, "base_url", ""))
            aux_api_key = str(getattr(client, "api_key", ""))
+
+            # Read user-configured context_length for the compression model.
+            # Custom endpoints often don't support /models API queries so
+            # get_model_context_length() falls through to the 128K default,
+            # ignoring the explicit config value.  Pass it as the highest-
+            # priority hint so the configured value is always respected.
+            _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {})
+            _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
+            if _aux_context_config is not None:
+                try:
+                    _aux_context_config = int(_aux_context_config)
+                except (TypeError, ValueError):
+                    _aux_context_config = None
+
            aux_context = get_model_context_length(
                aux_model,
                base_url=aux_base_url,
                api_key=aux_api_key,
+                config_context_length=_aux_context_config,
            )

            threshold = self.context_compressor.threshold_tokens
@@ -1857,12 +1887,13 @@ class AIAgent:
        if not content:
            return ""
        # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
-        # <reasoning>, <REASONING_SCRATCHPAD>
+        # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
-        content = re.sub(r'</?(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
        return content

    def _looks_like_codex_intermediate_ack(
@@ -3178,6 +3209,12 @@ class AIAgent:
                f"not on any model name returned by the API."
            )

+        # Environment hints (WSL, Termux, etc.) — tell the agent about the
+        # execution environment so it can translate paths and adapt behavior.
+        _env_hints = build_environment_hints()
+        if _env_hints:
+            prompt_parts.append(_env_hints)
+
        platform_key = (self.platform or "").lower().strip()
        if platform_key in PLATFORM_HINTS:
            prompt_parts.append(PLATFORM_HINTS[platform_key])
@@ -5354,13 +5391,22 @@ class AIAgent:
                # a new API call, creating a duplicate message.  Return a
                # partial "stop" response instead so the outer loop treats this
                # turn as complete (no retry, no fallback).
+                # Recover whatever content was already streamed to the user.
+                # _current_streamed_assistant_text accumulates text fired
+                # through _fire_stream_delta, so it has exactly what the
+                # user saw before the connection died.
+                _partial_text = (
+                    getattr(self, "_current_streamed_assistant_text", "") or ""
+                ).strip() or None
                logger.warning(
                    "Partial stream delivered before error; returning stub "
-                    "response to prevent duplicate messages: %s",
+                    "response with %s chars of recovered content to prevent "
+                    "duplicate messages: %s",
+                    len(_partial_text or ""),
                    result["error"],
                )
                _stub_msg = SimpleNamespace(
-                    role="assistant", content=None, tool_calls=None,
+                    role="assistant", content=_partial_text, tool_calls=None,
                    reasoning_content=None,
                )
                return SimpleNamespace(
@@ -5819,11 +5865,12 @@ class AIAgent:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
        MiniMax keeps dots (e.g. MiniMax-M2.7).
-        OpenCode Go keeps dots (e.g. minimax-m2.7)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go"}:
+        OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free).
+        ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1)."""
+        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}:
            return True
        base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/go" in base
+        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base

    def _is_qwen_portal(self) -> bool:
        """Return True when the base URL targets Qwen Portal."""
@@ -8203,7 +8250,8 @@ class AIAgent:
                        if self.thinking_callback:
                            self.thinking_callback("")
                        
-                        # This is often rate limiting or provider returning malformed response
+                        # Invalid response — could be rate limiting, provider timeout,
+                        # upstream server error, or malformed response.
                        retry_count += 1
                        
                        # Eager fallback: empty/malformed responses are a common
@@ -8239,11 +8287,44 @@ class AIAgent:
                            if self.verbose_logging:
                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
                        
+                        # Extract error code from response for contextual diagnostics
+                        _resp_error_code = None
+                        if response and hasattr(response, 'error') and response.error:
+                            _code_raw = getattr(response.error, 'code', None)
+                            if _code_raw is None and isinstance(response.error, dict):
+                                _code_raw = response.error.get('code')
+                            if _code_raw is not None:
+                                try:
+                                    _resp_error_code = int(_code_raw)
+                                except (TypeError, ValueError):
+                                    pass
+
+                        # Build a human-readable failure hint from the error code
+                        # and response time, instead of always assuming rate limiting.
+                        if _resp_error_code == 524:
+                            _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)"
+                        elif _resp_error_code == 504:
+                            _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)"
+                        elif _resp_error_code == 429:
+                            _failure_hint = f"rate limited by upstream provider (429)"
+                        elif _resp_error_code in (500, 502):
+                            _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)"
+                        elif _resp_error_code in (503, 529):
+                            _failure_hint = f"upstream provider overloaded ({_resp_error_code})"
+                        elif _resp_error_code is not None:
+                            _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)"
+                        elif api_duration < 10:
+                            _failure_hint = f"fast response ({api_duration:.1f}s) — likely rate limited"
+                        elif api_duration > 60:
+                            _failure_hint = f"slow response ({api_duration:.0f}s) — likely upstream timeout"
+                        else:
+                            _failure_hint = f"response time {api_duration:.1f}s"
+
                        self._vprint(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
                        self._vprint(f"{self.log_prefix}   🏢 Provider: {provider_name}", force=True)
                        cleaned_provider_error = self._clean_error_message(error_msg)
                        self._vprint(f"{self.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                        self._vprint(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)", force=True)
+                        self._vprint(f"{self.log_prefix}   ⏱️  {_failure_hint}", force=True)
                        
                        if retry_count >= max_retries:
                            # Try fallback before giving up
@@ -8260,14 +8341,13 @@ class AIAgent:
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
-                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
+                                "error": f"Invalid API response after {max_retries} retries: {_failure_hint}",
                                "failed": True  # Mark as failure for filtering
                            }
                        
-                        # Longer backoff for rate limiting (likely cause of None choices)
-                        # Jittered exponential: 5s base, 120s cap + random jitter
+                        # Backoff before retry — jittered exponential: 5s base, 120s cap
                        wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...", force=True)
+                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                        
                        # Sleep in small increments to stay responsive to interrupts
@@ -8278,7 +8358,7 @@ class AIAgent:
                                self._persist_session(messages, conversation_history)
                                self.clear_interrupt()
                                return {
-                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
+                                    "final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).",
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
@@ -9681,12 +9761,25 @@ class AIAgent:
                    
                    # Pop thinking-only prefill message(s) before appending
                    # (tool-call path — same rationale as the final-response path).
+                    _had_prefill = False
                    while (
                        messages
                        and isinstance(messages[-1], dict)
                        and messages[-1].get("_thinking_prefill")
                    ):
                        messages.pop()
+                        _had_prefill = True
+
+                    # Reset prefill counter when tool calls follow a prefill
+                    # recovery.  Without this, the counter accumulates across
+                    # the whole conversation — a model that intermittently
+                    # empties (empty → prefill → tools → empty → prefill →
+                    # tools) burns both prefill attempts and the third empty
+                    # gets zero recovery.  Resetting here treats each tool-
+                    # call success as a fresh start.
+                    if _had_prefill:
+                        self._thinking_prefill_retries = 0
+                        self._empty_content_retries = 0

                    messages.append(assistant_msg)
                    self._emit_interim_assistant_message(assistant_msg)
@@ -9805,6 +9898,30 @@ class AIAgent:
                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
+                        # ── Partial stream recovery ─────────────────────
+                        # If content was already streamed to the user before
+                        # the connection died, use it as the final response
+                        # instead of falling through to prior-turn fallback
+                        # or wasting API calls on retries.
+                        _partial_streamed = (
+                            getattr(self, "_current_streamed_assistant_text", "") or ""
+                        )
+                        if self._has_content_after_think_block(_partial_streamed):
+                            _turn_exit_reason = "partial_stream_recovery"
+                            _recovered = self._strip_think_blocks(_partial_streamed).strip()
+                            logger.info(
+                                "Partial stream content delivered (%d chars) "
+                                "— using as final response",
+                                len(_recovered),
+                            )
+                            self._emit_status(
+                                "↻ Stream interrupted — using delivered content "
+                                "as final response"
+                            )
+                            final_response = _recovered
+                            self._response_was_previewed = True
+                            break
+
                        # If the previous turn already delivered real content alongside
                        # tool calls (e.g. "You're welcome!" + memory save), the model
                        # has nothing more to say. Use the earlier content immediately
@@ -9862,16 +9979,23 @@ class AIAgent:
                            self._save_session_log(messages)
                            continue

-                        # ── Empty response retry (no reasoning) ──────
-                        # Model returned nothing — no content, no
-                        # structured reasoning, no tool calls.  Common
-                        # with open models (transient provider issues,
-                        # rate limits, sampling flukes).  Retry up to 3
-                        # times before attempting fallback.  Skip when
-                        # content has inline <think> tags (model chose
-                        # to reason, just no visible text).
-                        _truly_empty = not final_response.strip()
-                        if _truly_empty and not _has_structured and self._empty_content_retries < 3:
+                        # ── Empty response retry ──────────────────────
+                        # Model returned nothing usable.  Retry up to 3
+                        # times before attempting fallback.  This covers
+                        # both truly empty responses (no content, no
+                        # reasoning) AND reasoning-only responses after
+                        # prefill exhaustion — models like mimo-v2-pro
+                        # always populate reasoning fields via OpenRouter,
+                        # so the old `not _has_structured` guard blocked
+                        # retries for every reasoning model after prefill.
+                        _truly_empty = not self._strip_think_blocks(
+                            final_response
+                        ).strip()
+                        _prefill_exhausted = (
+                            _has_structured
+                            and self._thinking_prefill_retries >= 2
+                        )
+                        if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3:
                            self._empty_content_retries += 1
                            logger.warning(
                                "Empty response (no content or reasoning) — "
@@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+"""Build the Hermes Skills Index — a centralized JSON catalog of all skills.
+
+This script crawls every skill source (skills.sh, GitHub taps, official,
+clawhub, lobehub, claude-marketplace) and writes a JSON index with resolved
+GitHub paths. The index is served as a static file on the docs site so that
+`hermes skills search/install` can use it without hitting the GitHub API.
+
+Usage:
+    # Local (uses gh CLI or GITHUB_TOKEN for auth)
+    python scripts/build_skills_index.py
+
+    # CI (set GITHUB_TOKEN as secret)
+    GITHUB_TOKEN=ghp_... python scripts/build_skills_index.py
+
+Output: website/static/api/skills-index.json
+"""
+
+import json
+import os
+import sys
+import time
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+
+# Allow importing from repo root
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, REPO_ROOT)
+
+# Ensure HERMES_HOME is set (needed by tools/skills_hub.py imports)
+os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
+
+from tools.skills_hub import (
+    GitHubAuth,
+    GitHubSource,
+    SkillsShSource,
+    OptionalSkillSource,
+    WellKnownSkillSource,
+    ClawHubSource,
+    ClaudeMarketplaceSource,
+    LobeHubSource,
+    SkillMeta,
+)
+import httpx
+
+OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
+INDEX_VERSION = 1
+
+
+def _meta_to_dict(meta: SkillMeta) -> dict:
+    """Convert a SkillMeta to a serializable dict."""
+    return {
+        "name": meta.name,
+        "description": meta.description,
+        "source": meta.source,
+        "identifier": meta.identifier,
+        "trust_level": meta.trust_level,
+        "repo": meta.repo or "",
+        "path": meta.path or "",
+        "tags": meta.tags or [],
+        "extra": meta.extra or {},
+    }
+
+
+def crawl_source(source, source_name: str, limit: int) -> list:
+    """Crawl a single source and return skill dicts."""
+    print(f"  Crawling {source_name}...", flush=True)
+    start = time.time()
+    try:
+        results = source.search("", limit=limit)
+    except Exception as e:
+        print(f"  Error crawling {source_name}: {e}", file=sys.stderr)
+        return []
+    skills = [_meta_to_dict(m) for m in results]
+    elapsed = time.time() - start
+    print(f"  {source_name}: {len(skills)} skills ({elapsed:.1f}s)", flush=True)
+    return skills
+
+
+def crawl_skills_sh(source: SkillsShSource) -> list:
+    """Crawl skills.sh using popular queries for broad coverage."""
+    print("  Crawling skills.sh (popular queries)...", flush=True)
+    start = time.time()
+
+    queries = [
+        "",  # featured
+        "react", "python", "web", "api", "database", "docker",
+        "testing", "scraping", "design", "typescript", "git",
+        "aws", "security", "data", "ml", "ai", "devops",
+        "frontend", "backend", "mobile", "cli", "documentation",
+        "kubernetes", "terraform", "rust", "go", "java",
+    ]
+
+    all_skills: dict[str, dict] = {}
+    for query in queries:
+        try:
+            results = source.search(query, limit=50)
+            for meta in results:
+                entry = _meta_to_dict(meta)
+                if entry["identifier"] not in all_skills:
+                    all_skills[entry["identifier"]] = entry
+        except Exception as e:
+            print(f"    Warning: skills.sh search '{query}' failed: {e}",
+                  file=sys.stderr)
+
+    elapsed = time.time() - start
+    print(f"  skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
+          flush=True)
+    return list(all_skills.values())
+
+
+def _fetch_repo_tree(repo: str, auth: GitHubAuth) -> list:
+    """Fetch the recursive tree for a repo. Returns list of tree entries."""
+    headers = auth.get_headers()
+    try:
+        resp = httpx.get(
+            f"https://api.github.com/repos/{repo}",
+            headers=headers, timeout=15, follow_redirects=True,
+        )
+        if resp.status_code != 200:
+            return []
+        branch = resp.json().get("default_branch", "main")
+
+        resp = httpx.get(
+            f"https://api.github.com/repos/{repo}/git/trees/{branch}",
+            params={"recursive": "1"},
+            headers=headers, timeout=30, follow_redirects=True,
+        )
+        if resp.status_code != 200:
+            return []
+        data = resp.json()
+        if data.get("truncated"):
+            return []
+        return data.get("tree", [])
+    except Exception:
+        return []
+
+
+def batch_resolve_paths(skills: list, auth: GitHubAuth) -> list:
+    """Resolve GitHub paths for skills.sh entries using batch tree lookups.
+
+    Instead of resolving each skill individually (N×M API calls), we:
+    1. Group skills by repo
+    2. Fetch one tree per repo (2 API calls per repo)
+    3. Find all SKILL.md files in the tree
+    4. Match skills to their resolved paths
+    """
+    # Filter to skills.sh entries that need resolution
+    skills_sh = [s for s in skills if s["source"] in ("skills.sh", "skills-sh")]
+    if not skills_sh:
+        return skills
+
+    print(f"  Resolving paths for {len(skills_sh)} skills.sh entries...",
+          flush=True)
+    start = time.time()
+
+    # Group by repo
+    by_repo: dict[str, list] = defaultdict(list)
+    for s in skills_sh:
+        repo = s.get("repo", "")
+        if repo:
+            by_repo[repo].append(s)
+
+    print(f"    {len(by_repo)} unique repos to scan", flush=True)
+
+    resolved_count = 0
+
+    # Fetch trees in parallel (up to 6 concurrent)
+    def _resolve_repo(repo: str, entries: list):
+        tree = _fetch_repo_tree(repo, auth)
+        if not tree:
+            return 0
+
+        # Find all SKILL.md paths in this repo
+        skill_paths = {}  # skill_dir_name -> full_path
+        for item in tree:
+            if item.get("type") != "blob":
+                continue
+            path = item.get("path", "")
+            if path.endswith("/SKILL.md"):
+                skill_dir = path[: -len("/SKILL.md")]
+                dir_name = skill_dir.split("/")[-1]
+                skill_paths[dir_name.lower()] = f"{repo}/{skill_dir}"
+
+                # Also check SKILL.md frontmatter name if we can match by path
+                # For now, just index by directory name
+            elif path == "SKILL.md":
+                # Root-level SKILL.md
+                skill_paths["_root_"] = f"{repo}"
+
+        count = 0
+        for entry in entries:
+            # Try to match the skill's name/path to a tree entry
+            skill_name = entry.get("name", "").lower()
+            skill_path = entry.get("path", "").lower()
+            identifier = entry.get("identifier", "")
+
+            # Extract the skill token from the identifier
+            # e.g. "skills-sh/d4vinci/scrapling/scrapling-official" -> "scrapling-official"
+            parts = identifier.replace("skills-sh/", "").replace("skills.sh/", "")
+            skill_token = parts.split("/")[-1].lower() if "/" in parts else ""
+
+            # Try matching in order of likelihood
+            for candidate in [skill_token, skill_name, skill_path]:
+                if not candidate:
+                    continue
+                matched = skill_paths.get(candidate)
+                if matched:
+                    entry["resolved_github_id"] = matched
+                    count += 1
+                    break
+            else:
+                # Try fuzzy: skill_token with common transformations
+                for tree_name, tree_path in skill_paths.items():
+                    if (skill_token and (
+                        tree_name.replace("-", "") == skill_token.replace("-", "")
+                        or skill_token in tree_name
+                        or tree_name in skill_token
+                    )):
+                        entry["resolved_github_id"] = tree_path
+                        count += 1
+                        break
+
+        return count
+
+    with ThreadPoolExecutor(max_workers=6) as pool:
+        futures = {
+            pool.submit(_resolve_repo, repo, entries): repo
+            for repo, entries in by_repo.items()
+        }
+        for future in as_completed(futures):
+            try:
+                resolved_count += future.result()
+            except Exception as e:
+                repo = futures[future]
+                print(f"    Warning: {repo}: {e}", file=sys.stderr)
+
+    elapsed = time.time() - start
+    print(f"  Resolved {resolved_count}/{len(skills_sh)} paths ({elapsed:.1f}s)",
+          flush=True)
+    return skills
+
+
+def main():
+    print("Building Hermes Skills Index...", flush=True)
+    overall_start = time.time()
+
+    auth = GitHubAuth()
+    print(f"GitHub auth: {auth.auth_method()}")
+    if auth.auth_method() == "anonymous":
+        print("WARNING: No GitHub authentication — rate limit is 60/hr. "
+              "Set GITHUB_TOKEN for better results.", file=sys.stderr)
+
+    skills_sh_source = SkillsShSource(auth=auth)
+    sources = {
+        "official": OptionalSkillSource(),
+        "well-known": WellKnownSkillSource(),
+        "github": GitHubSource(auth=auth),
+        "clawhub": ClawHubSource(),
+        "claude-marketplace": ClaudeMarketplaceSource(auth=auth),
+        "lobehub": LobeHubSource(),
+    }
+
+    all_skills: list[dict] = []
+
+    # Crawl skills.sh
+    all_skills.extend(crawl_skills_sh(skills_sh_source))
+
+    # Crawl other sources in parallel
+    with ThreadPoolExecutor(max_workers=4) as pool:
+        futures = {}
+        for name, source in sources.items():
+            futures[pool.submit(crawl_source, source, name, 500)] = name
+        for future in as_completed(futures):
+            try:
+                all_skills.extend(future.result())
+            except Exception as e:
+                print(f"  Error: {e}", file=sys.stderr)
+
+    # Batch resolve GitHub paths for skills.sh entries
+    all_skills = batch_resolve_paths(all_skills, auth)
+
+    # Deduplicate by identifier
+    seen: dict[str, dict] = {}
+    for skill in all_skills:
+        key = skill["identifier"]
+        if key not in seen:
+            seen[key] = skill
+    deduped = list(seen.values())
+
+    # Sort
+    source_order = {"official": 0, "skills-sh": 1, "skills.sh": 1,
+                    "github": 2, "well-known": 3, "clawhub": 4,
+                    "claude-marketplace": 5, "lobehub": 6}
+    deduped.sort(key=lambda s: (source_order.get(s["source"], 99), s["name"]))
+
+    # Build index
+    index = {
+        "version": INDEX_VERSION,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "skill_count": len(deduped),
+        "skills": deduped,
+    }
+
+    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
+
+    elapsed = time.time() - overall_start
+    file_size = os.path.getsize(OUTPUT_PATH)
+    print(f"\nDone! {len(deduped)} skills indexed in {elapsed:.0f}s")
+    print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
+
+    from collections import Counter
+    by_source = Counter(s["source"] for s in deduped)
+    for src, count in sorted(by_source.items(), key=lambda x: -x[1]):
+        resolved = sum(1 for s in deduped
+                       if s["source"] == src and s.get("resolved_github_id"))
+        extra = f" ({resolved} resolved)" if resolved else ""
+        print(f"  {src}: {count}{extra}")
+
+
+if __name__ == "__main__":
+    main()
@@ -15,9 +15,9 @@
      }
    },
    "node_modules/@borewit/text-codec": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.1.tgz",
-      "integrity": "sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==",
+      "version": "0.2.2",
+      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz",
+      "integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==",
      "license": "MIT",
      "funding": {
        "type": "github",
@@ -1088,9 +1088,9 @@
      }
    },
    "node_modules/file-type": {
-      "version": "21.3.0",
-      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.0.tgz",
-      "integrity": "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA==",
+      "version": "21.3.4",
+      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz",
+      "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/inflate": "^0.4.1",
@@ -1456,9 +1456,9 @@
      "license": "MIT"
    },
    "node_modules/music-metadata": {
-      "version": "11.12.1",
-      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.1.tgz",
-      "integrity": "sha512-j++ltLxHDb5VCXET9FzQ8bnueiLHwQKgCO7vcbkRH/3F7fRjPkv6qncGEJ47yFhmemcYtgvsOAlcQ1dRBTkDjg==",
+      "version": "11.12.3",
+      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.3.tgz",
+      "integrity": "sha512-n6hSTZkuD59qWgHh6IP5dtDlDZQXoxk/bcA85Jywg8Z1iFrlNgl2+GTFgjZyn52W5UgQpV42V4XqrQZZAMbZTQ==",
      "funding": [
        {
          "type": "github",
@@ -1471,11 +1471,11 @@
      ],
      "license": "MIT",
      "dependencies": {
-        "@borewit/text-codec": "^0.2.1",
+        "@borewit/text-codec": "^0.2.2",
        "@tokenizer/token": "^0.3.0",
        "content-type": "^1.0.5",
        "debug": "^4.4.3",
-        "file-type": "^21.3.0",
+        "file-type": "^21.3.1",
        "media-typer": "^1.1.0",
        "strtok3": "^10.3.4",
        "token-types": "^6.1.2",
@@ -1589,9 +1589,9 @@
      }
    },
    "node_modules/path-to-regexp": {
-      "version": "0.1.12",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
-      "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.13.tgz",
+      "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==",
      "license": "MIT"
    },
    "node_modules/pino": {
@@ -2002,9 +2002,9 @@
      }
    },
    "node_modules/strtok3": {
-      "version": "10.3.4",
-      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.4.tgz",
-      "integrity": "sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==",
+      "version": "10.3.5",
+      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz",
+      "integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/token": "^0.3.0"
@@ -19,7 +19,7 @@ What makes Hermes different:

 - **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment.
 - **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works.
- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat.
+- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 10+ other platforms with full tool access, not just chat.
 - **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically.
 - **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory.
 - **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem.
@@ -148,7 +148,7 @@ hermes gateway status       Check status
 hermes gateway setup        Configure platforms
 ```

-Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI.
+Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, BlueBubbles (iMessage), Weixin (WeChat), API Server, Webhooks. Open WebUI connects via the API Server adapter.

 Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/

@@ -215,7 +215,7 @@ hermes insights [--days N]  Usage analytics
 hermes update               Update to latest version
 hermes pairing list/approve/revoke  DM authorization
 hermes plugins list/install/remove  Plugin management
-hermes honcho setup/status  Honcho memory integration
+hermes honcho setup/status  Honcho memory integration (requires honcho plugin)
 hermes memory setup/status/off  Memory provider config
 hermes completion bash|zsh  Shell completions
 hermes acp                  ACP server (IDE integration)
@@ -269,6 +269,28 @@ Type these during an interactive chat session.
 /plugins             List plugins (CLI)
 ```

+### Gateway
+```
+/approve             Approve a pending command (gateway)
+/deny                Deny a pending command (gateway)
+/restart             Restart gateway (gateway)
+/sethome             Set current chat as home channel (gateway)
+/update              Update Hermes to latest (gateway)
+/platforms (/gateway) Show platform connection status (gateway)
+```
+
+### Utility
+```
+/branch (/fork)      Branch the current session
+/btw                 Ephemeral side question (doesn't interrupt main task)
+/fast                Toggle priority/fast processing
+/browser             Open CDP browser connection
+/history             Show conversation history (CLI)
+/save                Save conversation to file (CLI)
+/paste               Attach clipboard image (CLI)
+/image               Attach local image file (CLI)
+```
+
 ### Info
 ```
 /help                Show commands
@@ -311,11 +333,11 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
 | `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) |
 | `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) |
 | `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` |
-| `stt` | `enabled`, `provider` (local/groq/openai) |
-| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) |
+| `stt` | `enabled`, `provider` (local/groq/openai/mistral) |
+| `tts` | `provider` (edge/elevenlabs/openai/minimax/mistral/neutts) |
 | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
 | `security` | `tirith_enabled`, `website_blocklist` |
-| `delegation` | `model`, `provider`, `max_iterations` (50) |
+| `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
 | `smart_model_routing` | `enabled`, `cheap_model` |
 | `checkpoints` | `enabled`, `max_snapshots` (50) |

@@ -323,7 +345,7 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con

 ### Providers

-18 providers supported. Set via `hermes model` or `hermes setup`.
+20+ providers supported. Set via `hermes model` or `hermes setup`.

 | Provider | Auth | Key env var |
 |----------|------|-------------|
@@ -332,16 +354,23 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
 | Nous Portal | OAuth | `hermes login --provider nous` |
 | OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
 | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
+| Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
 | DeepSeek | API key | `DEEPSEEK_API_KEY` |
+| xAI / Grok | API key | `XAI_API_KEY` |
 | Hugging Face | Token | `HF_TOKEN` |
 | Z.AI / GLM | API key | `GLM_API_KEY` |
 | MiniMax | API key | `MINIMAX_API_KEY` |
+| MiniMax CN | API key | `MINIMAX_CN_API_KEY` |
 | Kimi / Moonshot | API key | `KIMI_API_KEY` |
 | Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` |
+| Xiaomi MiMo | API key | `XIAOMI_API_KEY` |
 | Kilo Code | API key | `KILOCODE_API_KEY` |
+| AI Gateway (Vercel) | API key | `AI_GATEWAY_API_KEY` |
+| OpenCode Zen | API key | `OPENCODE_ZEN_API_KEY` |
+| OpenCode Go | API key | `OPENCODE_GO_API_KEY` |
+| Qwen OAuth | OAuth | `hermes login --provider qwen-oauth` |
 | Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml |
-
-Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP.
+| GitHub Copilot ACP | External | `COPILOT_CLI_PATH` or Copilot CLI |

 Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers

@@ -365,6 +394,10 @@ Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable
 | `delegation` | Subagent task delegation |
 | `cronjob` | Scheduled task management |
 | `clarify` | Ask user clarifying questions |
+| `messaging` | Cross-platform message sending |
+| `search` | Web search only (subset of `web`) |
+| `todo` | In-session task planning and tracking |
+| `rl` | Reinforcement learning tools (off by default) |
 | `moa` | Mixture of Agents (off by default) |
 | `homeassistant` | Smart home control (off by default) |

@@ -382,12 +415,13 @@ Provider priority (auto-detected):
 1. **Local faster-whisper** — free, no API key: `pip install faster-whisper`
 2. **Groq Whisper** — free tier: set `GROQ_API_KEY`
 3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY`
+4. **Mistral Voxtral** — set `MISTRAL_API_KEY`

 Config:
 ```yaml
 stt:
  enabled: true
-  provider: local        # local, groq, openai
+  provider: local        # local, groq, openai, mistral
  local:
    model: base          # tiny, base, small, medium, large-v3
 ```
@@ -399,8 +433,9 @@ stt:
 | Edge TTS | None | Yes (default) |
 | ElevenLabs | `ELEVENLABS_API_KEY` | Free tier |
 | OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid |
-| Kokoro (local) | None | Free |
-| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier |
+| MiniMax | `MINIMAX_API_KEY` | Paid |
+| Mistral (Voxtral) | `MISTRAL_API_KEY` | Paid |
+| NeuTTS (local) | None (`pip install neutts[all]` + `espeak-ng`) | Free |

 Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`.

@@ -492,7 +527,7 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 ### Voice not working
 1. Check `stt.enabled: true` in config.yaml
 2. Verify provider: `pip install faster-whisper` or set API key
-3. Restart gateway: `/restart`
+3. In gateway: `/restart`. In CLI: exit and relaunch.

 ### Tool not available
 1. `hermes tools` — check if toolset is enabled for your platform
@@ -503,10 +538,11 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 1. `hermes doctor` — check config and dependencies
 2. `hermes login` — re-authenticate OAuth providers
 3. Check `.env` has the right API key
+4. **Copilot 403**: `gh auth login` tokens do NOT work for Copilot API. You must use the Copilot-specific OAuth device code flow via `hermes model` → GitHub Copilot.

 ### Changes not taking effect
 - **Tools/skills:** `/reset` starts a new session with updated toolset
- **Config changes:** `/restart` reloads gateway config
+- **Config changes:** In gateway: `/restart`. In CLI: exit and relaunch.
 - **Code changes:** Restart the CLI or gateway process

 ### Skills not showing
@@ -520,6 +556,23 @@ Check logs first:
 grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20
 ```

+Common gateway problems:
+- **Gateway dies on SSH logout**: Enable linger: `sudo loginctl enable-linger $USER`
+- **Gateway dies on WSL2 close**: WSL2 requires `systemd=true` in `/etc/wsl.conf` for systemd services to work. Without it, gateway falls back to `nohup` (dies when session closes).
+- **Gateway crash loop**: Reset the failed state: `systemctl --user reset-failed hermes-gateway`
+
+### Platform-specific issues
+- **Discord bot silent**: Must enable **Message Content Intent** in Bot → Privileged Gateway Intents.
+- **Slack bot only works in DMs**: Must subscribe to `message.channels` event. Without it, the bot ignores public channels.
+- **Windows HTTP 400 "No models provided"**: Config file encoding issue (BOM). Ensure `config.yaml` is saved as UTF-8 without BOM.
+
+### Auxiliary models not working
+If `auxiliary` tasks (vision, compression, session_search) fail silently, the `auto` provider can't find a backend. Either set `OPENROUTER_API_KEY` or `GOOGLE_API_KEY`, or explicitly configure each auxiliary task's provider:
+```bash
+hermes config set auxiliary.vision.provider <your_provider>
+hermes config set auxiliary.vision.model <model_name>
+```
+
 ---

 ## Where to Find Things
@@ -557,7 +610,7 @@ hermes-agent/
 ├── toolsets.py           # Toolset definitions
 ├── cli.py                # Interactive CLI (HermesCLI)
 ├── hermes_state.py       # SQLite session store
-├── agent/                # Prompt builder, compression, display, adapters
+├── agent/                # Prompt builder, context compression, memory, model routing, credential pooling, skill dispatch
 ├── hermes_cli/           # CLI subcommands, config, setup, commands
 │   ├── commands.py       # Slash command registry (CommandDef)
 │   ├── config.py         # DEFAULT_CONFIG, env var definitions
@@ -626,7 +679,6 @@ run_conversation():
 ### Testing

 ```bash
-source venv/bin/activate  # or .venv/bin/activate
 python -m pytest tests/ -o 'addopts=' -q   # Full suite
 python -m pytest tests/tools/ -q            # Specific area
 ```
@@ -820,6 +820,24 @@ Every successful ML paper centers on what Neel Nanda calls "the narrative": a sh

 **If you cannot state your contribution in one sentence, you don't yet have a paper.**

+### The Sources Behind This Guidance
+
+This skill synthesizes writing philosophy from researchers who have published extensively at top venues. The writing philosophy layer was originally compiled by [Orchestra Research](https://github.com/orchestra-research) as the `ml-paper-writing` skill.
+
+| Source | Key Contribution | Link |
+|--------|-----------------|------|
+| **Neel Nanda** (Google DeepMind) | The Narrative Principle, What/Why/So What framework | [How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) |
+| **Sebastian Farquhar** (DeepMind) | 5-sentence abstract formula | [How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) |
+| **Gopen & Swan** | 7 principles of reader expectations | [Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) |
+| **Zachary Lipton** | Word choice, eliminating hedging | [Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) |
+| **Jacob Steinhardt** (UC Berkeley) | Precision, consistent terminology | [Writing Tips](https://bounded-regret.ghost.io/) |
+| **Ethan Perez** (Anthropic) | Micro-level clarity tips | [Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) |
+| **Andrej Karpathy** | Single contribution focus | Various lectures |
+
+**For deeper dives into any of these, see:**
+- [references/writing-guide.md](references/writing-guide.md) — Full explanations with examples
+- [references/sources.md](references/sources.md) — Complete bibliography
+
 ### Time Allocation

 Spend approximately **equal time** on each of:
@@ -4,6 +4,12 @@ This document lists all authoritative sources used to build this skill, organize

 ---

+## Origin & Attribution
+
+The writing philosophy, citation verification workflow, and conference reference materials in this skill were originally compiled by **[Orchestra Research](https://github.com/orchestra-research)** as the `ml-paper-writing` skill (January 2026), drawing on Neel Nanda's blog post and other researcher guides listed below. The skill was integrated into hermes-agent by teknium (January 2026), then expanded into the current `research-paper-writing` pipeline by SHL0MS (April 2026, PR #4654), which added experiment design, execution monitoring, iterative refinement, and submission phases while preserving the original writing philosophy and reference files.
+
+---
+
 ## Writing Philosophy & Guides

 ### Primary Sources (Must-Read)
@@ -971,6 +971,74 @@ class TestTaskSpecificOverrides:
            client, model = get_text_auxiliary_client("compression")
        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter

+    def test_resolve_auto_prefers_live_main_runtime_over_persisted_config(self, monkeypatch, tmp_path):
+        """Session-only live model switches should override persisted config for auto routing."""
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "config.yaml").write_text(
+            """model:
+  default: glm-5.1
+  provider: opencode-go
+compression:
+  summary_provider: auto
+"""
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        calls = []
+
+        def _fake_resolve(provider, model=None, *args, **kwargs):
+            calls.append((provider, model, kwargs))
+            return MagicMock(), model or "resolved-model"
+
+        with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_fake_resolve):
+            client, model = _resolve_auto(
+                main_runtime={
+                    "provider": "openai-codex",
+                    "model": "gpt-5.4",
+                    "api_mode": "codex_responses",
+                }
+            )
+
+        assert client is not None
+        assert model == "gpt-5.4"
+        assert calls[0][0] == "openai-codex"
+        assert calls[0][1] == "gpt-5.4"
+        assert calls[0][2]["api_mode"] == "codex_responses"
+
+    def test_explicit_compression_pin_still_wins_over_live_main_runtime(self, monkeypatch, tmp_path):
+        """Task-level compression config should beat a live session override."""
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "config.yaml").write_text(
+            """auxiliary:
+  compression:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+model:
+  default: glm-5.1
+  provider: opencode-go
+"""
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(MagicMock(), "google/gemini-3-flash-preview")) as mock_resolve:
+            client, model = get_text_auxiliary_client(
+                "compression",
+                main_runtime={
+                    "provider": "openai-codex",
+                    "model": "gpt-5.4",
+                },
+            )
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_resolve.call_args.args[0] == "openrouter"
+        assert mock_resolve.call_args.kwargs["main_runtime"] == {
+            "provider": "openai-codex",
+            "model": "gpt-5.4",
+        }
+
    def test_compression_summary_base_url_from_config(self, monkeypatch, tmp_path):
        """compression.summary_base_url should produce a custom-endpoint client."""
        hermes_home = tmp_path / "hermes"
@@ -1560,3 +1628,74 @@ class TestStaleBaseUrlWarning:

        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
            "Warning should not fire a second time"
+
+
+# ---------------------------------------------------------------------------
+# Anthropic-compatible image block conversion
+# ---------------------------------------------------------------------------
+
+class TestAnthropicCompatImageConversion:
+    """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic."""
+
+    def test_known_providers_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("minimax", "")
+        assert _is_anthropic_compat_endpoint("minimax-cn", "")
+
+    def test_openrouter_not_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert not _is_anthropic_compat_endpoint("openrouter", "")
+        assert not _is_anthropic_compat_endpoint("anthropic", "")
+
+    def test_url_based_detection(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic")
+        assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1")
+        assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1")
+
+    def test_base64_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "describe"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][1]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "base64"
+        assert img_block["source"]["media_type"] == "image/png"
+        assert img_block["source"]["data"] == "iVBOR="
+
+    def test_url_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][0]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "url"
+        assert img_block["source"]["url"] == "https://example.com/img.jpg"
+
+    def test_text_only_messages_unchanged(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{"role": "user", "content": "Hello"}]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0] is messages[0]  # same object, not copied
+
+    def test_jpeg_media_type_parsed(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
@@ -191,6 +191,37 @@ class TestNonStringContent:
        kwargs = mock_call.call_args.kwargs
        assert "temperature" not in kwargs

+    def test_summary_call_passes_live_main_runtime(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "ok"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="gpt-5.4",
+                provider="openai-codex",
+                base_url="https://chatgpt.com/backend-api/codex",
+                api_key="codex-token",
+                api_mode="codex_responses",
+                quiet_mode=True,
+            )
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
+            c._generate_summary(messages)
+
+        assert mock_call.call_args.kwargs["main_runtime"] == {
+            "model": "gpt-5.4",
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "api_mode": "codex_responses",
+        }
+

 class TestSummaryFailureCooldown:
    def test_summary_failure_enters_cooldown_and_skips_retry(self):
@@ -308,6 +308,34 @@ class TestMinimaxPreserveDots:
        from run_agent import AIAgent
        assert AIAgent._anthropic_preserve_dots(agent) is False

+    def test_opencode_zen_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="opencode-zen", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_opencode_zen_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://opencode.ai/zen/v1")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_zai_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="zai", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_bigmodel_cn_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://open.bigmodel.cn/api/paas/v4")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_normalize_preserves_m25_free_dot(self):
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name("minimax-m2.5-free", preserve_dots=True) == "minimax-m2.5-free"
+
    def test_normalize_preserves_m27_dot(self):
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
@@ -87,7 +87,10 @@ class TestProviderMapping:

    def test_unmapped_provider_not_in_dict(self):
        assert "nous" not in PROVIDER_TO_MODELS_DEV
-        assert "openai-codex" not in PROVIDER_TO_MODELS_DEV
+
+    def test_openai_codex_mapped_to_openai(self):
+        assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
+        assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"


 class TestExtractContext:
@@ -18,6 +18,7 @@ from agent.prompt_builder import (
    build_skills_system_prompt,
    build_nous_subscription_prompt,
    build_context_files_prompt,
+    build_environment_hints,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
@@ -26,6 +27,7 @@ from agent.prompt_builder import (
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
+    WSL_ENVIRONMENT_HINT,
 )
 from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures

@@ -770,6 +772,29 @@ class TestPromptBuilderConstants:
        assert "cli" in PLATFORM_HINTS


+# =========================================================================
+# Environment hints
+# =========================================================================
+
+class TestEnvironmentHints:
+    def test_wsl_hint_constant_mentions_mnt(self):
+        assert "/mnt/c/" in WSL_ENVIRONMENT_HINT
+        assert "WSL" in WSL_ENVIRONMENT_HINT
+
+    def test_build_environment_hints_on_wsl(self, monkeypatch):
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: True)
+        result = _pb.build_environment_hints()
+        assert "/mnt/" in result
+        assert "WSL" in result
+
+    def test_build_environment_hints_not_wsl(self, monkeypatch):
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        result = _pb.build_environment_hints()
+        assert result == ""
+
+
 # =========================================================================
 # Conditional skill activation
 # =========================================================================
@@ -180,33 +180,71 @@ class TestDisplayResumedHistory:
        assert 200 <= a_count <= 310  # roughly 300 chars (±panel padding)

    def test_long_assistant_message_truncated(self):
+        """Non-last assistant messages are still truncated."""
        cli = _make_cli()
        long_text = "B" * 400
        cli.conversation_history = [
            {"role": "user", "content": "Tell me a lot."},
            {"role": "assistant", "content": long_text},
+            {"role": "user", "content": "And more?"},
+            {"role": "assistant", "content": "Short final reply."},
        ]
        output = self._capture_display(cli)

-        assert "..." in output
+        # The non-last assistant message should be truncated
        assert "B" * 400 not in output
+        # The last assistant message shown in full
+        assert "Short final reply." in output

    def test_multiline_assistant_truncated(self):
+        """Non-last multiline assistant messages are truncated to 3 lines."""
        cli = _make_cli()
        multi = "\n".join([f"Line {i}" for i in range(20)])
        cli.conversation_history = [
            {"role": "user", "content": "Show me lines."},
            {"role": "assistant", "content": multi},
+            {"role": "user", "content": "What else?"},
+            {"role": "assistant", "content": "Done."},
        ]
        output = self._capture_display(cli)

-        # First 3 lines should be there
+        # First 3 lines of non-last assistant should be there
        assert "Line 0" in output
        assert "Line 1" in output
        assert "Line 2" in output
-        # Line 19 should NOT be there (truncated after 3 lines)
+        # Line 19 should NOT be in the truncated message
        assert "Line 19" not in output

+    def test_last_assistant_response_shown_in_full(self):
+        """The last assistant response is shown un-truncated so the user
+        knows where they left off without wasting tokens re-asking."""
+        cli = _make_cli()
+        long_text = "X" * 500
+        cli.conversation_history = [
+            {"role": "user", "content": "Tell me everything."},
+            {"role": "assistant", "content": long_text},
+        ]
+        output = self._capture_display(cli)
+
+        # Full 500-char text should be present (may be line-wrapped by Rich)
+        x_count = output.count("X")
+        assert x_count >= 490  # allow small Rich formatting variance
+
+    def test_last_assistant_multiline_shown_in_full(self):
+        """The last assistant response shows all lines, not just 3."""
+        cli = _make_cli()
+        multi = "\n".join([f"Line {i}" for i in range(20)])
+        cli.conversation_history = [
+            {"role": "user", "content": "Show me everything."},
+            {"role": "assistant", "content": multi},
+        ]
+        output = self._capture_display(cli)
+
+        # All 20 lines should be present since it's the last response
+        assert "Line 0" in output
+        assert "Line 10" in output
+        assert "Line 19" in output
+
    def test_large_history_shows_truncation_indicator(self):
        cli = _make_cli()
        cli.conversation_history = _large_history(n_exchanges=15)
@@ -0,0 +1,87 @@
+"""Tests for _normalize_chat_content in the API server adapter."""
+
+from gateway.platforms.api_server import _normalize_chat_content
+
+
+class TestNormalizeChatContent:
+    """Content normalization converts array-based content parts to plain text."""
+
+    def test_none_returns_empty_string(self):
+        assert _normalize_chat_content(None) == ""
+
+    def test_plain_string_returned_as_is(self):
+        assert _normalize_chat_content("hello world") == "hello world"
+
+    def test_empty_string_returned_as_is(self):
+        assert _normalize_chat_content("") == ""
+
+    def test_text_content_part(self):
+        content = [{"type": "text", "text": "hello"}]
+        assert _normalize_chat_content(content) == "hello"
+
+    def test_input_text_content_part(self):
+        content = [{"type": "input_text", "text": "user input"}]
+        assert _normalize_chat_content(content) == "user input"
+
+    def test_output_text_content_part(self):
+        content = [{"type": "output_text", "text": "assistant output"}]
+        assert _normalize_chat_content(content) == "assistant output"
+
+    def test_multiple_text_parts_joined_with_newline(self):
+        content = [
+            {"type": "text", "text": "first"},
+            {"type": "text", "text": "second"},
+        ]
+        assert _normalize_chat_content(content) == "first\nsecond"
+
+    def test_mixed_string_and_dict_parts(self):
+        content = ["plain string", {"type": "text", "text": "dict part"}]
+        assert _normalize_chat_content(content) == "plain string\ndict part"
+
+    def test_image_url_parts_silently_skipped(self):
+        content = [
+            {"type": "text", "text": "check this:"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
+        ]
+        assert _normalize_chat_content(content) == "check this:"
+
+    def test_integer_content_converted(self):
+        assert _normalize_chat_content(42) == "42"
+
+    def test_boolean_content_converted(self):
+        assert _normalize_chat_content(True) == "True"
+
+    def test_deeply_nested_list_respects_depth_limit(self):
+        """Nesting beyond max_depth returns empty string."""
+        content = [[[[[[[[[[[["deep"]]]]]]]]]]]]
+        result = _normalize_chat_content(content)
+        # The deep nesting should be truncated, not crash
+        assert isinstance(result, str)
+
+    def test_large_list_capped(self):
+        """Lists beyond MAX_CONTENT_LIST_SIZE are truncated."""
+        content = [{"type": "text", "text": f"item{i}"} for i in range(2000)]
+        result = _normalize_chat_content(content)
+        # Should not contain all 2000 items
+        assert result.count("item") <= 1000
+
+    def test_oversized_string_truncated(self):
+        """Strings beyond 64KB are truncated."""
+        huge = "x" * 100_000
+        result = _normalize_chat_content(huge)
+        assert len(result) == 65_536
+
+    def test_empty_text_parts_filtered(self):
+        content = [
+            {"type": "text", "text": ""},
+            {"type": "text", "text": "actual"},
+            {"type": "text", "text": ""},
+        ]
+        assert _normalize_chat_content(content) == "actual"
+
+    def test_dict_without_type_skipped(self):
+        content = [{"foo": "bar"}, {"type": "text", "text": "real"}]
+        assert _normalize_chat_content(content) == "real"
+
+    def test_empty_list_returns_empty(self):
+        assert _normalize_chat_content([]) == ""
@@ -0,0 +1,226 @@
+"""Tests for the clean shutdown marker that prevents unwanted session auto-resets.
+
+When the gateway shuts down gracefully (hermes update, gateway restart, /restart),
+it writes a .clean_shutdown marker.  On the next startup, if the marker exists,
+suspend_recently_active() is skipped so users don't lose their sessions.
+
+After a crash (no marker), suspension still fires as a safety net for stuck sessions.
+"""
+
+import os
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig, SessionResetPolicy
+from gateway.session import SessionEntry, SessionSource, SessionStore
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
+    return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
+
+
+def _make_store(tmp_path, policy=None):
+    config = GatewayConfig()
+    if policy:
+        config.default_reset_policy = policy
+    return SessionStore(sessions_dir=tmp_path, config=config)
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.suspend_recently_active
+# ---------------------------------------------------------------------------
+
+class TestSuspendRecentlyActive:
+    """Verify suspend_recently_active only marks recent sessions."""
+
+    def test_suspends_recently_active_sessions(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        count = store.suspend_recently_active()
+        assert count == 1
+
+        # Re-fetch — should be suspended now
+        refreshed = store.get_or_create_session(source)
+        assert refreshed.was_auto_reset
+
+    def test_does_not_suspend_old_sessions(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        # Backdate the session's updated_at beyond the cutoff
+        with store._lock:
+            entry.updated_at = datetime.now() - timedelta(seconds=300)
+            store._save()
+
+        count = store.suspend_recently_active(max_age_seconds=120)
+        assert count == 0
+
+    def test_already_suspended_not_double_counted(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        # Suspend once
+        count1 = store.suspend_recently_active()
+        assert count1 == 1
+
+        # Create a new session (the old one got reset on next access)
+        entry2 = store.get_or_create_session(source)
+
+        # Suspend again — the new session is recent but not yet suspended
+        count2 = store.suspend_recently_active()
+        assert count2 == 1
+
+
+# ---------------------------------------------------------------------------
+# Clean shutdown marker integration
+# ---------------------------------------------------------------------------
+
+class TestCleanShutdownMarker:
+    """Test that the marker file controls session suspension on startup."""
+
+    def test_marker_written_on_graceful_stop(self, tmp_path, monkeypatch):
+        """stop() should write .clean_shutdown marker."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        marker = tmp_path / ".clean_shutdown"
+        assert not marker.exists()
+
+        # Create a minimal runner and call the shutdown logic directly
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._restart_requested = False
+        runner._restart_detached = False
+        runner._restart_via_service = False
+        runner._restart_task_started = False
+        runner._running = True
+        runner._draining = False
+        runner._stop_task = None
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._pending_approvals = {}
+        runner._background_tasks = set()
+        runner._shutdown_event = MagicMock()
+        runner._restart_drain_timeout = 5
+        runner._exit_code = None
+        runner._exit_reason = None
+        runner.adapters = {}
+        runner.config = GatewayConfig()
+
+        # Mock heavy dependencies
+        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
+             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
+             patch("gateway.run.GatewayRunner._update_runtime_status"), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("tools.process_registry.process_registry") as mock_proc_reg, \
+             patch("tools.terminal_tool.cleanup_all_environments"), \
+             patch("tools.browser_tool.cleanup_all_browsers"):
+            mock_proc_reg.kill_all = MagicMock()
+
+            import asyncio
+            asyncio.get_event_loop().run_until_complete(runner.stop())
+
+        assert marker.exists(), ".clean_shutdown marker should exist after graceful stop"
+
+    def test_marker_skips_suspension_on_startup(self, tmp_path, monkeypatch):
+        """If .clean_shutdown exists, suspend_recently_active should NOT be called."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+
+        # Create the marker
+        marker = tmp_path / ".clean_shutdown"
+        marker.touch()
+
+        # Create a store with a recently active session
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        # Simulate what start() does:
+        if marker.exists():
+            marker.unlink()
+            # Should NOT call suspend_recently_active
+        else:
+            store.suspend_recently_active()
+
+        # Session should NOT be suspended
+        with store._lock:
+            store._ensure_loaded_locked()
+            for e in store._entries.values():
+                assert not e.suspended, "Session should NOT be suspended after clean shutdown"
+
+        assert not marker.exists(), "Marker should be cleaned up"
+
+    def test_no_marker_triggers_suspension(self, tmp_path, monkeypatch):
+        """Without .clean_shutdown marker (crash), suspension should fire."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+
+        marker = tmp_path / ".clean_shutdown"
+        assert not marker.exists()
+
+        # Create a store with a recently active session
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        # Simulate what start() does:
+        if marker.exists():
+            marker.unlink()
+        else:
+            store.suspend_recently_active()
+
+        # Session SHOULD be suspended (crash recovery)
+        with store._lock:
+            store._ensure_loaded_locked()
+            suspended_count = sum(1 for e in store._entries.values() if e.suspended)
+        assert suspended_count == 1, "Session should be suspended after crash (no marker)"
+
+    def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
+        """stop(restart=True) should also write the marker."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        marker = tmp_path / ".clean_shutdown"
+
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._restart_requested = False
+        runner._restart_detached = False
+        runner._restart_via_service = False
+        runner._restart_task_started = False
+        runner._running = True
+        runner._draining = False
+        runner._stop_task = None
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._pending_approvals = {}
+        runner._background_tasks = set()
+        runner._shutdown_event = MagicMock()
+        runner._restart_drain_timeout = 5
+        runner._exit_code = None
+        runner._exit_reason = None
+        runner.adapters = {}
+        runner.config = GatewayConfig()
+
+        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
+             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
+             patch("gateway.run.GatewayRunner._update_runtime_status"), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("tools.process_registry.process_registry") as mock_proc_reg, \
+             patch("tools.terminal_tool.cleanup_all_environments"), \
+             patch("tools.browser_tool.cleanup_all_browsers"):
+            mock_proc_reg.kill_all = MagicMock()
+
+            import asyncio
+            asyncio.get_event_loop().run_until_complete(runner.stop(restart=True))
+
+        assert marker.exists(), ".clean_shutdown marker should exist after restart-stop too"
@@ -124,7 +124,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_off_mode_no_reply_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -137,7 +137,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_first_mode_only_first_chunk_references(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -152,7 +152,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_all_mode_all_chunks_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -165,7 +165,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_no_reply_to_param_no_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]

        await adapter.send("12345", "test content", reply_to=None)

@@ -176,7 +176,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_respects_first_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -187,7 +187,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_off_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -200,7 +200,7 @@ class TestSendWithReplyToMode:
    async def test_invalid_mode_falls_back_to_first_behavior(self):
        """Invalid mode behaves like 'first' — only first chunk gets reference."""
        adapter, channel, ref_msg = _make_discord_adapter("banana")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]

        await adapter.send("12345", "test", reply_to="999")

@@ -189,14 +189,14 @@ class TestPlatformDefaults:
        """Slack, Mattermost, Matrix default to 'new' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("slack", "mattermost", "matrix", "feishu"):
+        for plat in ("slack", "mattermost", "matrix", "feishu", "whatsapp"):
            assert resolve_display_setting({}, plat, "tool_progress") == "new", plat

    def test_low_tier_platforms(self):
-        """Signal, WhatsApp, etc. default to 'off' tool progress."""
+        """Signal, BlueBubbles, etc. default to 'off' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("signal", "whatsapp", "bluebubbles", "weixin", "wecom", "dingtalk"):
+        for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"):
            assert resolve_display_setting({}, plat, "tool_progress") == "off", plat

    def test_minimal_tier_platforms(self):
@@ -0,0 +1,438 @@
+"""Tests for gateway.platforms.feishu — Feishu scan-to-create registration."""
+
+import json
+from unittest.mock import patch, MagicMock
+import pytest
+
+
+def _mock_urlopen(response_data, status=200):
+    """Create a mock for urllib.request.urlopen that returns JSON response_data."""
+    mock_response = MagicMock()
+    mock_response.read.return_value = json.dumps(response_data).encode("utf-8")
+    mock_response.status = status
+    mock_response.__enter__ = lambda s: s
+    mock_response.__exit__ = MagicMock(return_value=False)
+    return mock_response
+
+
+class TestPostRegistration:
+    """Tests for the low-level HTTP helper."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_post_registration_returns_parsed_json(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _post_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({"nonce": "abc", "supported_auth_methods": ["client_secret"]})
+        result = _post_registration("https://accounts.feishu.cn", {"action": "init"})
+        assert result["nonce"] == "abc"
+        assert "client_secret" in result["supported_auth_methods"]
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_post_registration_sends_form_encoded_body(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _post_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({})
+        _post_registration("https://accounts.feishu.cn", {"action": "init", "key": "val"})
+        call_args = mock_urlopen_fn.call_args
+        request = call_args[0][0]
+        body = request.data.decode("utf-8")
+        assert "action=init" in body
+        assert "key=val" in body
+        assert request.get_header("Content-type") == "application/x-www-form-urlencoded"
+
+
+class TestInitRegistration:
+    """Tests for the init step."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_succeeds_when_client_secret_supported(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["client_secret"],
+        })
+        _init_registration("feishu")
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_raises_when_client_secret_not_supported(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["other_method"],
+        })
+        with pytest.raises(RuntimeError, match="client_secret"):
+            _init_registration("feishu")
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_uses_lark_url_for_lark_domain(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["client_secret"],
+        })
+        _init_registration("lark")
+        call_args = mock_urlopen_fn.call_args
+        request = call_args[0][0]
+        assert "larksuite.com" in request.full_url
+
+
+class TestBeginRegistration:
+    """Tests for the begin step."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_begin_returns_device_code_and_qr_url(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _begin_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "device_code": "dc_123",
+            "verification_uri_complete": "https://accounts.feishu.cn/qr/abc",
+            "user_code": "ABCD-1234",
+            "interval": 5,
+            "expire_in": 600,
+        })
+        result = _begin_registration("feishu")
+        assert result["device_code"] == "dc_123"
+        assert "qr_url" in result
+        assert "accounts.feishu.cn" in result["qr_url"]
+        assert result["user_code"] == "ABCD-1234"
+        assert result["interval"] == 5
+        assert result["expire_in"] == 600
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_begin_sends_correct_archetype(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _begin_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "device_code": "dc_123",
+            "verification_uri_complete": "https://example.com/qr",
+            "user_code": "X",
+            "interval": 5,
+            "expire_in": 600,
+        })
+        _begin_registration("feishu")
+        request = mock_urlopen_fn.call_args[0][0]
+        body = request.data.decode("utf-8")
+        assert "archetype=PersonalAgent" in body
+        assert "auth_method=client_secret" in body
+
+
+class TestPollRegistration:
+    """Tests for the poll step."""
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_credentials_on_success(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "client_id": "cli_app123",
+            "client_secret": "secret456",
+            "user_info": {"open_id": "ou_owner", "tenant_brand": "feishu"},
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["app_id"] == "cli_app123"
+        assert result["app_secret"] == "secret456"
+        assert result["domain"] == "feishu"
+        assert result["open_id"] == "ou_owner"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_switches_domain_on_lark_tenant_brand(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1, 2]
+        mock_time.sleep = MagicMock()
+
+        pending_resp = _mock_urlopen({
+            "error": "authorization_pending",
+            "user_info": {"tenant_brand": "lark"},
+        })
+        success_resp = _mock_urlopen({
+            "client_id": "cli_lark",
+            "client_secret": "secret_lark",
+            "user_info": {"open_id": "ou_lark", "tenant_brand": "lark"},
+        })
+        mock_urlopen_fn.side_effect = [pending_resp, success_resp]
+
+        result = _poll_registration(
+            device_code="dc_123", interval=0, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["domain"] == "lark"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_success_with_lark_brand_in_same_response(self, mock_urlopen_fn, mock_time):
+        """Credentials and lark tenant_brand in one response must not be discarded."""
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "client_id": "cli_lark_direct",
+            "client_secret": "secret_lark_direct",
+            "user_info": {"open_id": "ou_lark_direct", "tenant_brand": "lark"},
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["app_id"] == "cli_lark_direct"
+        assert result["domain"] == "lark"
+        assert result["open_id"] == "ou_lark_direct"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_none_on_access_denied(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "error": "access_denied",
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is None
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_none_on_timeout(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 999]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "error": "authorization_pending",
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=1, domain="feishu"
+        )
+        assert result is None
+
+
+class TestRenderQr:
+    """Tests for QR code terminal rendering."""
+
+    @patch("gateway.platforms.feishu._qrcode_mod", create=True)
+    def test_render_qr_returns_true_on_success(self, mock_qrcode_mod):
+        from gateway.platforms.feishu import _render_qr
+
+        mock_qr = MagicMock()
+        mock_qrcode_mod.QRCode.return_value = mock_qr
+        assert _render_qr("https://example.com/qr") is True
+        mock_qr.add_data.assert_called_once_with("https://example.com/qr")
+        mock_qr.make.assert_called_once_with(fit=True)
+        mock_qr.print_ascii.assert_called_once()
+
+    def test_render_qr_returns_false_when_qrcode_missing(self):
+        from gateway.platforms.feishu import _render_qr
+
+        with patch("gateway.platforms.feishu._qrcode_mod", None):
+            assert _render_qr("https://example.com/qr") is False
+
+
+class TestProbeBot:
+    """Tests for bot connectivity verification."""
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
+    def test_probe_returns_bot_info_on_success(self):
+        from gateway.platforms.feishu import probe_bot
+
+        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
+            mock_sdk.return_value = {"bot_name": "TestBot", "bot_open_id": "ou_bot123"}
+            result = probe_bot("cli_app", "secret", "feishu")
+
+        assert result is not None
+        assert result["bot_name"] == "TestBot"
+        assert result["bot_open_id"] == "ou_bot123"
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
+    def test_probe_returns_none_on_failure(self):
+        from gateway.platforms.feishu import probe_bot
+
+        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
+            mock_sdk.return_value = None
+            result = probe_bot("bad_id", "bad_secret", "feishu")
+
+        assert result is None
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_http_fallback_when_sdk_unavailable(self, mock_urlopen_fn):
+        """Without lark_oapi, probe falls back to raw HTTP."""
+        from gateway.platforms.feishu import probe_bot
+
+        token_resp = _mock_urlopen({"code": 0, "tenant_access_token": "t-123"})
+        bot_resp = _mock_urlopen({"code": 0, "bot": {"bot_name": "HttpBot", "open_id": "ou_http"}})
+        mock_urlopen_fn.side_effect = [token_resp, bot_resp]
+
+        result = probe_bot("cli_app", "secret", "feishu")
+        assert result is not None
+        assert result["bot_name"] == "HttpBot"
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_http_fallback_returns_none_on_network_error(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import probe_bot
+        from urllib.error import URLError
+
+        mock_urlopen_fn.side_effect = URLError("connection refused")
+        result = probe_bot("cli_app", "secret", "feishu")
+        assert result is None
+
+
+class TestQrRegister:
+    """Tests for the public qr_register entry point."""
+
+    @patch("gateway.platforms.feishu.probe_bot")
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_success_flow(
+        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
+    ):
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = {
+            "app_id": "cli_app",
+            "app_secret": "secret",
+            "domain": "feishu",
+            "open_id": "ou_owner",
+        }
+        mock_probe.return_value = {"bot_name": "MyBot", "bot_open_id": "ou_bot"}
+
+        result = qr_register()
+        assert result is not None
+        assert result["app_id"] == "cli_app"
+        assert result["app_secret"] == "secret"
+        assert result["bot_name"] == "MyBot"
+        mock_init.assert_called_once()
+        mock_render.assert_called_once()
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_init_failure(self, mock_init):
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = RuntimeError("not supported")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_poll_failure(
+        self, mock_init, mock_begin, mock_poll, mock_render
+    ):
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = None
+
+        result = qr_register()
+        assert result is None
+
+    # -- Contract: expected errors → None, unexpected errors → propagate --
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_network_error(self, mock_init):
+        """URLError (network down) is an expected failure → None."""
+        from gateway.platforms.feishu import qr_register
+        from urllib.error import URLError
+
+        mock_init.side_effect = URLError("DNS resolution failed")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_json_error(self, mock_init):
+        """Malformed server response is an expected failure → None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = json.JSONDecodeError("bad json", "", 0)
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_propagates_unexpected_errors(self, mock_init):
+        """Bugs (e.g. AttributeError) must not be swallowed — they propagate."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = AttributeError("some internal bug")
+        with pytest.raises(AttributeError, match="some internal bug"):
+            qr_register()
+
+    # -- Negative paths: partial/malformed server responses --
+
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_when_begin_missing_device_code(
+        self, mock_init, mock_begin, mock_render
+    ):
+        """Server returns begin response without device_code → RuntimeError → None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.side_effect = RuntimeError("Feishu registration did not return a device_code")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu.probe_bot")
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_succeeds_even_when_probe_fails(
+        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
+    ):
+        """Registration succeeds but probe fails → result with bot_name=None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = {
+            "app_id": "cli_app",
+            "app_secret": "secret",
+            "domain": "feishu",
+            "open_id": "ou_owner",
+        }
+        mock_probe.return_value = None  # probe failed
+
+        result = qr_register()
+        assert result is not None
+        assert result["app_id"] == "cli_app"
+        assert result["bot_name"] is None
+        assert result["bot_open_id"] is None
@@ -48,6 +48,7 @@ def _make_event(
    room_id="!room1:example.org",
    formatted_body=None,
    thread_id=None,
+    mention_user_ids=None,
 ):
    """Create a fake room message event.

@@ -60,6 +61,9 @@ def _make_event(
        content["formatted_body"] = formatted_body
        content["format"] = "org.matrix.custom.html"

+    if mention_user_ids is not None:
+        content["m.mentions"] = {"user_ids": mention_user_ids}
+
    relates_to = {}
    if thread_id:
        relates_to["rel_type"] = "m.thread"
@@ -108,6 +112,44 @@ class TestIsBotMentioned:
        # "hermesbot" should not match word-boundary check for "hermes"
        assert not self.adapter._is_bot_mentioned("hermesbot is here")

+    # m.mentions.user_ids — MSC3952 / Matrix v1.7 authoritative mentions
+    # Ported from openclaw/openclaw#64796
+
+    def test_m_mentions_user_ids_authoritative(self):
+        """m.mentions.user_ids alone is sufficient — no body text needed."""
+        assert self.adapter._is_bot_mentioned(
+            "please reply",  # no @hermes anywhere in body
+            mention_user_ids=["@hermes:example.org"],
+        )
+
+    def test_m_mentions_user_ids_with_body_mention(self):
+        """Both m.mentions and body mention — should still be True."""
+        assert self.adapter._is_bot_mentioned(
+            "hey @hermes:example.org help",
+            mention_user_ids=["@hermes:example.org"],
+        )
+
+    def test_m_mentions_user_ids_other_user_only(self):
+        """m.mentions with a different user — bot is NOT mentioned."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello",
+            mention_user_ids=["@alice:example.org"],
+        )
+
+    def test_m_mentions_user_ids_empty_list(self):
+        """Empty user_ids list — falls through to text detection."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello everyone",
+            mention_user_ids=[],
+        )
+
+    def test_m_mentions_user_ids_none(self):
+        """None mention_user_ids — falls through to text detection."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello everyone",
+            mention_user_ids=None,
+        )
+

 class TestStripMention:
    def setup_method(self):
@@ -176,6 +218,44 @@ async def test_require_mention_html_pill(monkeypatch):
    adapter.handle_message.assert_awaited_once()


+@pytest.mark.asyncio
+async def test_require_mention_m_mentions_user_ids(monkeypatch):
+    """m.mentions.user_ids is authoritative per MSC3952 — no body mention needed.
+
+    Ported from openclaw/openclaw#64796.
+    """
+    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
+    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    # Body has NO mention, but m.mentions.user_ids includes the bot.
+    event = _make_event(
+        "please reply",
+        mention_user_ids=["@hermes:example.org"],
+    )
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_require_mention_m_mentions_other_user_ignored(monkeypatch):
+    """m.mentions.user_ids mentioning another user should NOT activate the bot."""
+    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
+    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    event = _make_event(
+        "hey alice check this",
+        mention_user_ids=["@alice:example.org"],
+    )
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_not_awaited()
+
+
@pytest.mark.asyncio
 async def test_require_mention_dm_always_responds(monkeypatch):
    """DMs always respond regardless of mention setting."""
@@ -9,6 +9,8 @@ from gateway.platforms.base import (
    MessageEvent,
    MessageType,
    safe_url_for_log,
+    utf16_len,
+    _prefix_within_utf16_limit,
 )


@@ -448,3 +450,135 @@ class TestGetHumanDelay:
        with patch.dict(os.environ, env):
            delay = BasePlatformAdapter._get_human_delay()
            assert 0.1 <= delay <= 0.2
+
+
+# ---------------------------------------------------------------------------
+# utf16_len / _prefix_within_utf16_limit / truncate_message with len_fn
+# ---------------------------------------------------------------------------
+# Ported from nearai/ironclaw#2304 — Telegram counts message length in UTF-16
+# code units, not Unicode code-points.  Astral-plane characters (emoji, CJK
+# Extension B) are surrogate pairs: 1 Python char but 2 UTF-16 units.
+
+
+class TestUtf16Len:
+    """Verify the UTF-16 length helper."""
+
+    def test_ascii(self):
+        assert utf16_len("hello") == 5
+
+    def test_bmp_cjk(self):
+        # CJK ideographs in the BMP are 1 code unit each
+        assert utf16_len("你好") == 2
+
+    def test_emoji_surrogate_pair(self):
+        # 😀 (U+1F600) is outside BMP → 2 UTF-16 code units
+        assert utf16_len("😀") == 2
+
+    def test_mixed(self):
+        # "hi😀" = 2 + 2 = 4 UTF-16 units
+        assert utf16_len("hi😀") == 4
+
+    def test_musical_symbol(self):
+        # 𝄞 (U+1D11E) — Musical Symbol G Clef, surrogate pair
+        assert utf16_len("𝄞") == 2
+
+    def test_empty(self):
+        assert utf16_len("") == 0
+
+
+class TestPrefixWithinUtf16Limit:
+    """Verify UTF-16-aware prefix truncation."""
+
+    def test_fits_entirely(self):
+        assert _prefix_within_utf16_limit("hello", 10) == "hello"
+
+    def test_ascii_truncation(self):
+        result = _prefix_within_utf16_limit("hello world", 5)
+        assert result == "hello"
+        assert utf16_len(result) <= 5
+
+    def test_does_not_split_surrogate_pair(self):
+        # "a😀b" = 1 + 2 + 1 = 4 UTF-16 units; limit 2 should give "a"
+        result = _prefix_within_utf16_limit("a😀b", 2)
+        assert result == "a"
+        assert utf16_len(result) <= 2
+
+    def test_emoji_at_limit(self):
+        # "😀" = 2 UTF-16 units; limit 2 should include it
+        result = _prefix_within_utf16_limit("😀x", 2)
+        assert result == "😀"
+
+    def test_all_emoji(self):
+        msg = "😀" * 10  # 20 UTF-16 units
+        result = _prefix_within_utf16_limit(msg, 6)
+        assert result == "😀😀😀"
+        assert utf16_len(result) == 6
+
+    def test_empty(self):
+        assert _prefix_within_utf16_limit("", 5) == ""
+
+
+class TestTruncateMessageUtf16:
+    """Verify truncate_message respects UTF-16 lengths when len_fn=utf16_len."""
+
+    def test_short_emoji_message_no_split(self):
+        """A short message under the UTF-16 limit should not be split."""
+        msg = "Hello 😀 world"
+        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
+        assert len(chunks) == 1
+        assert chunks[0] == msg
+
+    def test_emoji_near_limit_triggers_split(self):
+        """A message at 4096 codepoints but >4096 UTF-16 units must split."""
+        # 2049 emoji = 2049 codepoints but 4098 UTF-16 units → exceeds 4096
+        msg = "😀" * 2049
+        assert len(msg) == 2049  # Python len sees 2049 chars
+        assert utf16_len(msg) == 4098  # but it's 4098 UTF-16 units
+
+        # Without UTF-16 awareness, this would NOT split (2049 < 4096)
+        chunks_naive = BasePlatformAdapter.truncate_message(msg, 4096)
+        assert len(chunks_naive) == 1, "Without len_fn, no split expected"
+
+        # With UTF-16 awareness, it MUST split
+        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
+        assert len(chunks) > 1, "With utf16_len, message should be split"
+
+        # Each chunk must fit within the UTF-16 limit
+        for i, chunk in enumerate(chunks):
+            assert utf16_len(chunk) <= 4096, (
+                f"Chunk {i} exceeds 4096 UTF-16 units: {utf16_len(chunk)}"
+            )
+
+    def test_each_utf16_chunk_within_limit(self):
+        """All chunks produced with utf16_len must fit the limit."""
+        # Mix of BMP and astral-plane characters
+        msg = ("Hello 😀 world 🎵 test 𝄞 " * 200).strip()
+        max_len = 200
+        chunks = BasePlatformAdapter.truncate_message(msg, max_len, len_fn=utf16_len)
+        for i, chunk in enumerate(chunks):
+            u16_len = utf16_len(chunk)
+            assert u16_len <= max_len + 20, (
+                f"Chunk {i} UTF-16 length {u16_len} exceeds {max_len}"
+            )
+
+    def test_all_content_preserved(self):
+        """Splitting with utf16_len must not lose content."""
+        words = ["emoji😀", "music🎵", "cjk你好", "plain"] * 100
+        msg = " ".join(words)
+        chunks = BasePlatformAdapter.truncate_message(msg, 200, len_fn=utf16_len)
+        reassembled = " ".join(chunks)
+        for word in words:
+            assert word in reassembled, f"Word '{word}' lost during UTF-16 split"
+
+    def test_code_blocks_preserved_with_utf16(self):
+        """Code block fence handling should work with utf16_len too."""
+        msg = "Before\n```python\n" + "x = '😀'\n" * 200 + "```\nAfter"
+        chunks = BasePlatformAdapter.truncate_message(msg, 300, len_fn=utf16_len)
+        assert len(chunks) > 1
+        # Each chunk should have balanced fences
+        for i, chunk in enumerate(chunks):
+            fence_count = chunk.count("```")
+            assert fence_count % 2 == 0, (
+                f"Chunk {i} has unbalanced fences ({fence_count})"
+            )
+
@@ -0,0 +1,215 @@
+"""Tests for /restart notification — the gateway notifies the requester on comeback."""
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+import gateway.run as gateway_run
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.session import build_session_key
+from tests.gateway.restart_test_helpers import (
+    make_restart_runner,
+    make_restart_source,
+)
+
+
+# ── _handle_restart_command writes .restart_notify.json ──────────────────
+
+
+@pytest.mark.asyncio
+async def test_restart_command_writes_notify_file(tmp_path, monkeypatch):
+    """When /restart fires, the requester's routing info is persisted to disk."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    result = await runner._handle_restart_command(event)
+    assert "Restarting" in result
+
+    notify_path = tmp_path / ".restart_notify.json"
+    assert notify_path.exists()
+    data = json.loads(notify_path.read_text())
+    assert data["platform"] == "telegram"
+    assert data["chat_id"] == "42"
+    assert "thread_id" not in data  # no thread → omitted
+
+
+@pytest.mark.asyncio
+async def test_restart_command_uses_service_restart_under_systemd(tmp_path, monkeypatch):
+    """Under systemd (INVOCATION_ID set), /restart uses via_service=True."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setenv("INVOCATION_ID", "abc123")
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    await runner._handle_restart_command(event)
+    runner.request_restart.assert_called_once_with(detached=False, via_service=True)
+
+
+@pytest.mark.asyncio
+async def test_restart_command_uses_detached_without_systemd(tmp_path, monkeypatch):
+    """Without systemd, /restart uses the detached subprocess approach."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    await runner._handle_restart_command(event)
+    runner.request_restart.assert_called_once_with(detached=True, via_service=False)
+
+
+@pytest.mark.asyncio
+async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
+    """Thread ID is saved when the requester is in a threaded chat."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="99")
+    source.thread_id = "topic_7"
+
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m2",
+    )
+
+    await runner._handle_restart_command(event)
+
+    data = json.loads((tmp_path / ".restart_notify.json").read_text())
+    assert data["thread_id"] == "topic_7"
+
+
+# ── _send_restart_notification ───────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkeypatch):
+    """On startup, the notification is sent and the file is removed."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    adapter.send.assert_called_once()
+    call_args = adapter.send.call_args
+    assert call_args[0][0] == "42"  # chat_id
+    assert "restarted" in call_args[0][1].lower()
+    assert call_args[1].get("metadata") is None  # no thread
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
+    """Thread ID is passed as metadata so the message lands in the right topic."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "99",
+        "thread_id": "topic_7",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    call_args = adapter.send.call_args
+    assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_noop_when_no_file(tmp_path, monkeypatch):
+    """Nothing happens if there's no pending restart notification."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    adapter.send.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_skips_when_adapter_missing(tmp_path, monkeypatch):
+    """If the requester's platform isn't connected, clean up without crashing."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "discord",  # runner only has telegram adapter
+        "chat_id": "42",
+    }))
+
+    runner, _adapter = make_restart_runner()
+
+    await runner._send_restart_notification()
+
+    # File cleaned up even though we couldn't send
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_cleans_up_on_send_failure(
+    tmp_path, monkeypatch
+):
+    """If the adapter.send() raises, the file is still cleaned up."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
+
+    await runner._send_restart_notification()
+
+    assert not notify_path.exists()  # cleaned up despite error
@@ -396,6 +396,27 @@ class QueuedCommentaryAgent:
        }


+class VerboseAgent:
+    """Agent that emits a tool call with args whose JSON exceeds 200 chars."""
+    LONG_CODE = "x" * 300
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback(
+            "tool.started", "execute_code", None,
+            {"code": self.LONG_CODE},
+        )
+        time.sleep(0.35)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 async def _run_with_agent(
    monkeypatch,
    tmp_path,
@@ -575,3 +596,45 @@ async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monke
    assert result["final_response"] == "final response 2"
    assert "I'll inspect the repo first." in sent_texts
    assert "final response 1" in sent_texts
+
+
+@pytest.mark.asyncio
+async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
+    """Verbose mode with default tool_preview_length (0) should NOT truncate args.
+
+    Previously, verbose mode capped args at 200 chars when tool_preview_length
+    was 0 (default).  The user explicitly opted into verbose — show full detail.
+    """
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        VerboseAgent,
+        session_id="sess-verbose-no-truncate",
+        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 0}},
+    )
+
+    assert result["final_response"] == "done"
+    # The full 300-char 'x' string should be present, not truncated to 200
+    all_content = " ".join(call["content"] for call in adapter.sent)
+    all_content += " ".join(call["content"] for call in adapter.edits)
+    assert VerboseAgent.LONG_CODE in all_content
+
+
+@pytest.mark.asyncio
+async def test_verbose_mode_respects_explicit_tool_preview_length(monkeypatch, tmp_path):
+    """When tool_preview_length is set to a positive value, verbose truncates to that."""
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        VerboseAgent,
+        session_id="sess-verbose-explicit-cap",
+        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 50}},
+    )
+
+    assert result["final_response"] == "done"
+    all_content = " ".join(call["content"] for call in adapter.sent)
+    all_content += " ".join(call["content"] for call in adapter.edits)
+    # Should be truncated — full 300-char string NOT present
+    assert VerboseAgent.LONG_CODE not in all_content
+    # But should still contain the truncated portion with "..."
+    assert "..." in all_content
@@ -0,0 +1,279 @@
+"""Tests for _setup_feishu() in hermes_cli/gateway.py.
+
+Verifies that the interactive setup writes env vars that correctly drive the
+Feishu adapter: credentials, connection mode, DM policy, and group policy.
+"""
+
+import os
+from unittest.mock import patch
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _run_setup_feishu(
+    *,
+    qr_result=None,
+    prompt_yes_no_responses=None,
+    prompt_choice_responses=None,
+    prompt_responses=None,
+    existing_env=None,
+):
+    """Run _setup_feishu() with mocked I/O and return the env vars that were saved.
+
+    Returns a dict of {env_var_name: value} for all save_env_value calls.
+    """
+    existing_env = existing_env or {}
+    prompt_yes_no_responses = list(prompt_yes_no_responses or [True])
+    # QR path: method(0), dm(0), group(0) — 3 choices (no connection mode)
+    # Manual path: method(1), domain(0), connection(0), dm(0), group(0) — 5 choices
+    prompt_choice_responses = list(prompt_choice_responses or [0, 0, 0])
+    prompt_responses = list(prompt_responses or [""])
+
+    saved_env = {}
+
+    def mock_save(name, value):
+        saved_env[name] = value
+
+    def mock_get(name):
+        return existing_env.get(name, "")
+
+    with patch("hermes_cli.gateway.save_env_value", side_effect=mock_save), \
+         patch("hermes_cli.gateway.get_env_value", side_effect=mock_get), \
+         patch("hermes_cli.gateway.prompt_yes_no", side_effect=prompt_yes_no_responses), \
+         patch("hermes_cli.gateway.prompt_choice", side_effect=prompt_choice_responses), \
+         patch("hermes_cli.gateway.prompt", side_effect=prompt_responses), \
+         patch("hermes_cli.gateway.print_info"), \
+         patch("hermes_cli.gateway.print_success"), \
+         patch("hermes_cli.gateway.print_warning"), \
+         patch("hermes_cli.gateway.print_error"), \
+         patch("hermes_cli.gateway.color", side_effect=lambda t, c: t), \
+         patch("gateway.platforms.feishu.qr_register", return_value=qr_result):
+
+        from hermes_cli.gateway import _setup_feishu
+        _setup_feishu()
+
+    return saved_env
+
+
+# ---------------------------------------------------------------------------
+# QR scan-to-create path
+# ---------------------------------------------------------------------------
+
+class TestSetupFeishuQrPath:
+    """Tests for the QR scan-to-create happy path."""
+
+    def test_qr_success_saves_core_credentials(self):
+        env = _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test",
+                "app_secret": "secret_test",
+                "domain": "feishu",
+                "open_id": "ou_owner",
+                "bot_name": "TestBot",
+                "bot_open_id": "ou_bot",
+            },
+            prompt_yes_no_responses=[True],        # Start QR
+            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
+            prompt_responses=[""],                  # home channel: skip
+        )
+        assert env["FEISHU_APP_ID"] == "cli_test"
+        assert env["FEISHU_APP_SECRET"] == "secret_test"
+        assert env["FEISHU_DOMAIN"] == "feishu"
+
+    def test_qr_success_does_not_persist_bot_identity(self):
+        """Bot identity is discovered at runtime by _hydrate_bot_identity — not persisted
+        in env, so it stays fresh if the user renames the bot later."""
+        env = _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test",
+                "app_secret": "secret_test",
+                "domain": "feishu",
+                "open_id": "ou_owner",
+                "bot_name": "TestBot",
+                "bot_open_id": "ou_bot",
+            },
+            prompt_yes_no_responses=[True],
+            prompt_choice_responses=[0, 0, 0],
+            prompt_responses=[""],
+        )
+        assert "FEISHU_BOT_OPEN_ID" not in env
+        assert "FEISHU_BOT_NAME" not in env
+
+
+# ---------------------------------------------------------------------------
+# Connection mode
+# ---------------------------------------------------------------------------
+
+class TestSetupFeishuConnectionMode:
+    """Connection mode: QR always websocket, manual path lets user choose."""
+
+    def test_qr_path_defaults_to_websocket(self):
+        env = _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
+                "open_id": None, "bot_name": None, "bot_open_id": None,
+            },
+            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
+            prompt_responses=[""],
+        )
+        assert env["FEISHU_CONNECTION_MODE"] == "websocket"
+
+    @patch("gateway.platforms.feishu.probe_bot", return_value=None)
+    def test_manual_path_websocket(self, _mock_probe):
+        env = _run_setup_feishu(
+            qr_result=None,
+            prompt_choice_responses=[1, 0, 0, 0, 0],  # method=manual, domain=feishu, connection=ws, dm=pairing, group=open
+            prompt_responses=["cli_manual", "secret_manual", ""],  # app_id, app_secret, home_channel
+        )
+        assert env["FEISHU_CONNECTION_MODE"] == "websocket"
+
+    @patch("gateway.platforms.feishu.probe_bot", return_value=None)
+    def test_manual_path_webhook(self, _mock_probe):
+        env = _run_setup_feishu(
+            qr_result=None,
+            prompt_choice_responses=[1, 0, 1, 0, 0],  # method=manual, domain=feishu, connection=webhook, dm=pairing, group=open
+            prompt_responses=["cli_manual", "secret_manual", ""],  # app_id, app_secret, home_channel
+        )
+        assert env["FEISHU_CONNECTION_MODE"] == "webhook"
+
+
+# ---------------------------------------------------------------------------
+# DM security policy
+# ---------------------------------------------------------------------------
+
+class TestSetupFeishuDmPolicy:
+    """DM policy must use platform-scoped FEISHU_ALLOW_ALL_USERS, not the global flag."""
+
+    def _run_with_dm_choice(self, dm_choice_idx, prompt_responses=None):
+        return _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
+                "open_id": "ou_owner", "bot_name": None, "bot_open_id": None,
+            },
+            prompt_yes_no_responses=[True],
+            prompt_choice_responses=[0, dm_choice_idx, 0],  # method=QR, dm=<choice>, group=open
+            prompt_responses=prompt_responses or [""],
+        )
+
+    def test_pairing_sets_feishu_allow_all_false(self):
+        env = self._run_with_dm_choice(0)
+        assert env["FEISHU_ALLOW_ALL_USERS"] == "false"
+        assert env["FEISHU_ALLOWED_USERS"] == ""
+        assert "GATEWAY_ALLOW_ALL_USERS" not in env
+
+    def test_allow_all_sets_feishu_allow_all_true(self):
+        env = self._run_with_dm_choice(1)
+        assert env["FEISHU_ALLOW_ALL_USERS"] == "true"
+        assert env["FEISHU_ALLOWED_USERS"] == ""
+        assert "GATEWAY_ALLOW_ALL_USERS" not in env
+
+    def test_allowlist_sets_feishu_allow_all_false_with_list(self):
+        env = self._run_with_dm_choice(2, prompt_responses=["ou_user1,ou_user2", ""])
+        assert env["FEISHU_ALLOW_ALL_USERS"] == "false"
+        assert env["FEISHU_ALLOWED_USERS"] == "ou_user1,ou_user2"
+        assert "GATEWAY_ALLOW_ALL_USERS" not in env
+
+    def test_allowlist_prepopulates_with_scan_owner_open_id(self):
+        """When open_id is available from QR scan, it should be the default allowlist value."""
+        # We return the owner's open_id from prompt (+ empty home channel).
+        env = self._run_with_dm_choice(2, prompt_responses=["ou_owner", ""])
+        assert env["FEISHU_ALLOWED_USERS"] == "ou_owner"
+
+
+
+# ---------------------------------------------------------------------------
+# Group policy
+# ---------------------------------------------------------------------------
+
+class TestSetupFeishuGroupPolicy:
+
+    def test_open_with_mention(self):
+        env = _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
+                "open_id": None, "bot_name": None, "bot_open_id": None,
+            },
+            prompt_yes_no_responses=[True],
+            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
+            prompt_responses=[""],
+        )
+        assert env["FEISHU_GROUP_POLICY"] == "open"
+
+    def test_disabled(self):
+        env = _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
+                "open_id": None, "bot_name": None, "bot_open_id": None,
+            },
+            prompt_yes_no_responses=[True],
+            prompt_choice_responses=[0, 0, 1],  # method=QR, dm=pairing, group=disabled
+            prompt_responses=[""],
+        )
+        assert env["FEISHU_GROUP_POLICY"] == "disabled"
+
+
+# ---------------------------------------------------------------------------
+# Adapter integration: env vars → FeishuAdapterSettings
+# ---------------------------------------------------------------------------
+
+class TestSetupFeishuAdapterIntegration:
+    """Verify that env vars written by _setup_feishu() produce a valid adapter config.
+
+    This bridges the gap between 'setup wrote the right env vars' and
+    'the adapter will actually initialize correctly from those vars'.
+    """
+
+    def _make_env_from_setup(self, dm_idx=0, group_idx=0):
+        """Run _setup_feishu via QR path and return the env vars it would write."""
+        return _run_setup_feishu(
+            qr_result={
+                "app_id": "cli_test_app",
+                "app_secret": "test_secret_value",
+                "domain": "feishu",
+                "open_id": "ou_owner",
+                "bot_name": "IntegrationBot",
+                "bot_open_id": "ou_bot_integration",
+            },
+            prompt_yes_no_responses=[True],
+            prompt_choice_responses=[0, dm_idx, group_idx],  # method=QR, dm, group
+            prompt_responses=[""],
+        )
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_qr_env_produces_valid_adapter_settings(self):
+        """QR setup → adapter initializes with websocket mode."""
+        env = self._make_env_from_setup()
+
+        with patch.dict(os.environ, env, clear=True):
+            from gateway.config import PlatformConfig
+            from gateway.platforms.feishu import FeishuAdapter
+            adapter = FeishuAdapter(PlatformConfig())
+            assert adapter._app_id == "cli_test_app"
+            assert adapter._app_secret == "test_secret_value"
+            assert adapter._domain_name == "feishu"
+            assert adapter._connection_mode == "websocket"
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_open_dm_env_sets_correct_adapter_state(self):
+        """Setup with 'allow all DMs' → adapter sees allow-all flag."""
+        env = self._make_env_from_setup(dm_idx=1)
+
+        with patch.dict(os.environ, env, clear=True):
+            from gateway.platforms.feishu import FeishuAdapter
+            from gateway.config import PlatformConfig
+            # Verify adapter initializes without error and env var is correct.
+            FeishuAdapter(PlatformConfig())
+            assert os.getenv("FEISHU_ALLOW_ALL_USERS") == "true"
+
+    @patch.dict(os.environ, {}, clear=True)
+    def test_group_open_env_sets_adapter_group_policy(self):
+        """Setup with 'open groups' → adapter group_policy is 'open'."""
+        env = self._make_env_from_setup(group_idx=0)
+
+        with patch.dict(os.environ, env, clear=True):
+            from gateway.config import PlatformConfig
+            from gateway.platforms.feishu import FeishuAdapter
+            adapter = FeishuAdapter(PlatformConfig())
+            assert adapter._group_policy == "open"
@@ -121,7 +121,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="off")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -133,7 +133,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="first")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -148,7 +148,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="all")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -162,7 +162,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="all")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]

        await adapter.send("12345", "test content", reply_to=None)

@@ -175,7 +175,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="first")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -403,6 +403,56 @@ class TestWatchUpdateProgress:

        # Should not crash; legacy notification handles this case

+    @pytest.mark.asyncio
+    async def test_prompt_forwarded_only_once(self, tmp_path):
+        """Regression: prompt must not be re-sent on every poll cycle.
+
+        Before the fix, the watcher never deleted .update_prompt.json after
+        forwarding, causing the same prompt to be sent every poll_interval.
+        """
+        runner = _make_runner()
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir()
+
+        pending = {"platform": "telegram", "chat_id": "111", "user_id": "222",
+                   "session_key": "agent:main:telegram:dm:111"}
+        (hermes_home / ".update_pending.json").write_text(json.dumps(pending))
+        (hermes_home / ".update_output.txt").write_text("")
+
+        mock_adapter = AsyncMock()
+        runner.adapters = {Platform.TELEGRAM: mock_adapter}
+
+        # Write the prompt file up front (before the watcher starts).
+        # The watcher should forward it exactly once, then delete it.
+        prompt = {"prompt": "Would you like to configure new options now? Y/n",
+                  "default": "n", "id": "dup-test"}
+        (hermes_home / ".update_prompt.json").write_text(json.dumps(prompt))
+
+        async def finish_after_polls():
+            # Wait long enough for multiple poll cycles to occur, then
+            # simulate a response + completion.
+            await asyncio.sleep(1.0)
+            (hermes_home / ".update_response").write_text("n")
+            await asyncio.sleep(0.3)
+            (hermes_home / ".update_exit_code").write_text("0")
+
+        with patch("gateway.run._hermes_home", hermes_home):
+            task = asyncio.create_task(finish_after_polls())
+            await runner._watch_update_progress(
+                poll_interval=0.1,
+                stream_interval=0.2,
+                timeout=10.0,
+            )
+            await task
+
+        # Count how many times the prompt text was sent
+        all_sent = [str(c) for c in mock_adapter.send.call_args_list]
+        prompt_sends = [s for s in all_sent if "configure new options" in s]
+        assert len(prompt_sends) == 1, (
+            f"Prompt was sent {len(prompt_sends)} times (expected 1). "
+            f"All sends: {all_sent}"
+        )
+

 # ---------------------------------------------------------------------------
 # Message interception for update prompts
@@ -0,0 +1,141 @@
+"""Tests for gateway weak credential rejection at startup.
+
+Ported from openclaw/openclaw#64586: rejects known-weak placeholder
+tokens at gateway startup instead of letting them silently fail
+against platform APIs.
+"""
+
+import logging
+
+import pytest
+
+from gateway.config import PlatformConfig, Platform, _validate_gateway_config
+
+
+# ---------------------------------------------------------------------------
+# Helper: create a minimal GatewayConfig with one enabled platform
+# ---------------------------------------------------------------------------
+
+
+def _make_gateway_config(platform, token, enabled=True, **extra_kwargs):
+    """Create a minimal GatewayConfig-like object for validation testing."""
+    from gateway.config import GatewayConfig
+
+    config = GatewayConfig(platforms={})
+    pconfig = PlatformConfig(enabled=enabled, token=token, **extra_kwargs)
+    config.platforms[platform] = pconfig
+    return config
+
+
+def _validate_and_return(config):
+    """Call _validate_gateway_config and return the config (mutated in place)."""
+    _validate_gateway_config(config)
+    return config
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: platform token placeholder rejection
+# ---------------------------------------------------------------------------
+
+
+class TestPlatformTokenPlaceholderGuard:
+    """Verify that _validate_gateway_config disables platforms with placeholder tokens."""
+
+    def test_rejects_triple_asterisk(self, caplog):
+        """'***' is the .env.example placeholder — should be rejected."""
+        config = _make_gateway_config(Platform.TELEGRAM, "***")
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.TELEGRAM].enabled is False
+        assert "placeholder" in caplog.text.lower()
+
+    def test_rejects_changeme(self, caplog):
+        config = _make_gateway_config(Platform.DISCORD, "changeme")
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.DISCORD].enabled is False
+
+    def test_rejects_your_api_key(self, caplog):
+        config = _make_gateway_config(Platform.SLACK, "your_api_key")
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.SLACK].enabled is False
+
+    def test_rejects_placeholder(self, caplog):
+        config = _make_gateway_config(Platform.MATRIX, "placeholder")
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.MATRIX].enabled is False
+
+    def test_accepts_real_token(self, caplog):
+        """A real-looking bot token should pass validation."""
+        config = _make_gateway_config(
+            Platform.TELEGRAM, "7123456789:AAHdqTcvCH1vGWJxfSeOfSAs0K5PALDsaw"
+        )
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.TELEGRAM].enabled is True
+        assert "placeholder" not in caplog.text.lower()
+
+    def test_accepts_empty_token_without_error(self, caplog):
+        """Empty tokens get a warning (existing behavior), not a placeholder error."""
+        config = _make_gateway_config(Platform.TELEGRAM, "")
+        with caplog.at_level(logging.WARNING):
+            _validate_and_return(config)
+        # Empty token doesn't trigger placeholder rejection — enabled stays True
+        # (the existing empty-token warning is separate)
+        assert config.platforms[Platform.TELEGRAM].enabled is True
+
+    def test_disabled_platform_not_checked(self, caplog):
+        """Disabled platforms should not be validated."""
+        config = _make_gateway_config(Platform.TELEGRAM, "***", enabled=False)
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert "placeholder" not in caplog.text.lower()
+
+    def test_rejects_whitespace_padded_placeholder(self, caplog):
+        """Whitespace-padded placeholders should still be caught."""
+        config = _make_gateway_config(Platform.TELEGRAM, "  ***  ")
+        with caplog.at_level(logging.ERROR):
+            _validate_and_return(config)
+        assert config.platforms[Platform.TELEGRAM].enabled is False
+
+
+# ---------------------------------------------------------------------------
+# Integration test: API server placeholder key on network-accessible host
+# ---------------------------------------------------------------------------
+
+
+class TestAPIServerPlaceholderKeyGuard:
+    """Verify that the API server rejects placeholder keys on network hosts."""
+
+    @pytest.mark.asyncio
+    async def test_refuses_wildcard_with_placeholder_key(self):
+        from gateway.platforms.api_server import APIServerAdapter
+
+        adapter = APIServerAdapter(
+            PlatformConfig(enabled=True, extra={"host": "0.0.0.0", "key": "changeme"})
+        )
+        result = await adapter.connect()
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_refuses_wildcard_with_asterisk_key(self):
+        from gateway.platforms.api_server import APIServerAdapter
+
+        adapter = APIServerAdapter(
+            PlatformConfig(enabled=True, extra={"host": "0.0.0.0", "key": "***"})
+        )
+        result = await adapter.connect()
+        assert result is False
+
+    def test_allows_loopback_with_placeholder_key(self):
+        """Loopback with a placeholder key is fine — not network-exposed."""
+        from gateway.platforms.api_server import APIServerAdapter
+        from gateway.platforms.base import is_network_accessible
+
+        adapter = APIServerAdapter(
+            PlatformConfig(enabled=True, extra={"host": "127.0.0.1", "key": "changeme"})
+        )
+        # On loopback the placeholder guard doesn't fire
+        assert is_network_accessible(adapter._host) is False
@@ -30,7 +30,7 @@ class TestWeixinFormatting:

        assert (
            adapter.format_message(content)
-            == "【Title】\n\n**Plan**\n\nUse **bold** and [docs](https://example.com)."
+            == "【Title】\n\n**Plan**\n\nUse **bold** and docs (https://example.com)."
        )

    def test_format_message_rewrites_markdown_tables(self):
@@ -374,3 +374,149 @@ class TestWeixinRemoteMediaSafety:
                assert "Blocked unsafe URL" in str(exc)
            else:
                raise AssertionError("expected ValueError for unsafe URL")
+
+
+class TestWeixinMarkdownLinks:
+    """Markdown links should be converted to plaintext since WeChat can't render them."""
+
+    def test_format_message_converts_markdown_links_to_plain_text(self):
+        adapter = _make_adapter()
+
+        content = "Check [the docs](https://example.com) and [GitHub](https://github.com) for details"
+        assert (
+            adapter.format_message(content)
+            == "Check the docs (https://example.com) and GitHub (https://github.com) for details"
+        )
+
+    def test_format_message_preserves_links_inside_code_blocks(self):
+        adapter = _make_adapter()
+
+        content = "See below:\n\n```\n[link](https://example.com)\n```\n\nDone."
+        result = adapter.format_message(content)
+        assert "[link](https://example.com)" in result
+
+
+class TestWeixinBlankMessagePrevention:
+    """Regression tests for the blank-bubble bugs.
+
+    Three separate guards now prevent a blank WeChat message from ever being
+    dispatched:
+
+    1. ``_split_text_for_weixin_delivery("")`` returns ``[]`` — not ``[""]``.
+    2. ``send()`` filters out empty/whitespace-only chunks before calling
+       ``_send_text_chunk``.
+    3. ``_send_message()`` raises ``ValueError`` for empty text as a last-resort
+       safety net.
+    """
+
+    def test_split_text_returns_empty_list_for_empty_string(self):
+        adapter = _make_adapter()
+        assert adapter._split_text("") == []
+
+    def test_split_text_returns_empty_list_for_empty_string_split_per_line(self):
+        adapter = WeixinAdapter(
+            PlatformConfig(
+                enabled=True,
+                extra={
+                    "account_id": "acct",
+                    "token": "test-tok",
+                    "split_multiline_messages": True,
+                },
+            )
+        )
+        assert adapter._split_text("") == []
+
+    @patch("gateway.platforms.weixin._send_message", new_callable=AsyncMock)
+    def test_send_empty_content_does_not_call_send_message(self, send_message_mock):
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._token = "test-token"
+        adapter._base_url = "https://weixin.example.com"
+        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
+
+        result = asyncio.run(adapter.send("wxid_test123", ""))
+        # Empty content → no chunks → no _send_message calls
+        assert result.success is True
+        send_message_mock.assert_not_awaited()
+
+    def test_send_message_rejects_empty_text(self):
+        """_send_message raises ValueError for empty/whitespace text."""
+        import pytest
+        with pytest.raises(ValueError, match="text must not be empty"):
+            asyncio.run(
+                weixin._send_message(
+                    AsyncMock(),
+                    base_url="https://example.com",
+                    token="tok",
+                    to="wxid_test",
+                    text="",
+                    context_token=None,
+                    client_id="cid",
+                )
+            )
+
+
+class TestWeixinStreamingCursorSuppression:
+    """WeChat doesn't support message editing — cursor must be suppressed."""
+
+    def test_supports_message_editing_is_false(self):
+        adapter = _make_adapter()
+        assert adapter.SUPPORTS_MESSAGE_EDITING is False
+
+
+class TestWeixinMediaBuilder:
+    """Media builder uses base64(hex_key), not base64(raw_bytes) for aes_key."""
+
+    def test_image_builder_aes_key_is_base64_of_hex(self):
+        import base64
+        adapter = _make_adapter()
+        media_type, builder = adapter._outbound_media_builder("photo.jpg")
+        assert media_type == weixin.MEDIA_IMAGE
+
+        fake_hex_key = "0123456789abcdef0123456789abcdef"
+        expected_aes = base64.b64encode(fake_hex_key.encode("ascii")).decode("ascii")
+        item = builder(
+            encrypt_query_param="eq",
+            aes_key_for_api=expected_aes,
+            ciphertext_size=1024,
+            plaintext_size=1000,
+            filename="photo.jpg",
+            rawfilemd5="abc123",
+        )
+        assert item["image_item"]["media"]["aes_key"] == expected_aes
+
+    def test_video_builder_includes_md5(self):
+        adapter = _make_adapter()
+        media_type, builder = adapter._outbound_media_builder("clip.mp4")
+        assert media_type == weixin.MEDIA_VIDEO
+
+        item = builder(
+            encrypt_query_param="eq",
+            aes_key_for_api="fakekey",
+            ciphertext_size=2048,
+            plaintext_size=2000,
+            filename="clip.mp4",
+            rawfilemd5="deadbeef",
+        )
+        assert item["video_item"]["video_md5"] == "deadbeef"
+
+    def test_voice_builder_for_audio_files(self):
+        adapter = _make_adapter()
+        media_type, builder = adapter._outbound_media_builder("note.mp3")
+        assert media_type == weixin.MEDIA_VOICE
+
+        item = builder(
+            encrypt_query_param="eq",
+            aes_key_for_api="fakekey",
+            ciphertext_size=512,
+            plaintext_size=500,
+            filename="note.mp3",
+            rawfilemd5="abc",
+        )
+        assert item["type"] == weixin.ITEM_VOICE
+        assert "voice_item" in item
+
+    def test_voice_builder_for_silk_files(self):
+        adapter = _make_adapter()
+        media_type, builder = adapter._outbound_media_builder("recording.silk")
+        assert media_type == weixin.MEDIA_VOICE
@@ -0,0 +1,271 @@
+"""Tests for WhatsApp message formatting and chunking.
+
+Covers:
+- format_message(): markdown → WhatsApp syntax conversion
+- send(): message chunking for long responses
+- MAX_MESSAGE_LENGTH: practical UX limit
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter():
+    """Create a WhatsAppAdapter with test attributes (bypass __init__)."""
+    from gateway.platforms.whatsapp import WhatsAppAdapter
+
+    adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
+    adapter.platform = Platform.WHATSAPP
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    adapter._bridge_port = 3000
+    adapter._bridge_script = "/tmp/test-bridge.js"
+    adapter._session_path = MagicMock()
+    adapter._bridge_log_fh = None
+    adapter._bridge_log = None
+    adapter._bridge_process = None
+    adapter._reply_prefix = None
+    adapter._running = True
+    adapter._message_handler = None
+    adapter._fatal_error_code = None
+    adapter._fatal_error_message = None
+    adapter._fatal_error_retryable = True
+    adapter._fatal_error_handler = None
+    adapter._active_sessions = {}
+    adapter._pending_messages = {}
+    adapter._background_tasks = set()
+    adapter._auto_tts_disabled_chats = set()
+    adapter._message_queue = asyncio.Queue()
+    adapter._http_session = MagicMock()
+    adapter._mention_patterns = []
+    return adapter
+
+
+class _AsyncCM:
+    """Minimal async context manager returning a fixed value."""
+
+    def __init__(self, value):
+        self.value = value
+
+    async def __aenter__(self):
+        return self.value
+
+    async def __aexit__(self, *exc):
+        return False
+
+
+# ---------------------------------------------------------------------------
+# format_message tests
+# ---------------------------------------------------------------------------
+
+class TestFormatMessage:
+    """WhatsApp markdown conversion."""
+
+    def test_bold_double_asterisk(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("**hello**") == "*hello*"
+
+    def test_bold_double_underscore(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("__hello__") == "*hello*"
+
+    def test_strikethrough(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("~~deleted~~") == "~deleted~"
+
+    def test_headers_converted_to_bold(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("# Title") == "*Title*"
+        assert adapter.format_message("## Subtitle") == "*Subtitle*"
+        assert adapter.format_message("### Deep") == "*Deep*"
+
+    def test_links_converted(self):
+        adapter = _make_adapter()
+        result = adapter.format_message("[click here](https://example.com)")
+        assert result == "click here (https://example.com)"
+
+    def test_code_blocks_protected(self):
+        """Code blocks should not have their content reformatted."""
+        adapter = _make_adapter()
+        content = "before **bold** ```python\n**not bold**\n``` after **bold**"
+        result = adapter.format_message(content)
+        assert "```python\n**not bold**\n```" in result
+        assert result.startswith("before *bold*")
+        assert result.endswith("after *bold*")
+
+    def test_inline_code_protected(self):
+        """Inline code should not have its content reformatted."""
+        adapter = _make_adapter()
+        content = "use `**raw**` here"
+        result = adapter.format_message(content)
+        assert "`**raw**`" in result
+        assert result.startswith("use ")
+
+    def test_empty_content(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("") == ""
+        assert adapter.format_message(None) is None
+
+    def test_plain_text_unchanged(self):
+        adapter = _make_adapter()
+        assert adapter.format_message("hello world") == "hello world"
+
+    def test_already_whatsapp_italic(self):
+        """Single *italic* should pass through unchanged."""
+        adapter = _make_adapter()
+        # After bold conversion, *text* is WhatsApp italic
+        assert adapter.format_message("*italic*") == "*italic*"
+
+    def test_multiline_mixed(self):
+        adapter = _make_adapter()
+        content = "# Header\n\n**Bold text** and ~~strike~~\n\n```\ncode\n```"
+        result = adapter.format_message(content)
+        assert "*Header*" in result
+        assert "*Bold text*" in result
+        assert "~strike~" in result
+        assert "```\ncode\n```" in result
+
+
+# ---------------------------------------------------------------------------
+# MAX_MESSAGE_LENGTH tests
+# ---------------------------------------------------------------------------
+
+class TestMessageLimits:
+    """WhatsApp message length limits."""
+
+    def test_max_message_length_is_practical(self):
+        from gateway.platforms.whatsapp import WhatsAppAdapter
+        assert WhatsAppAdapter.MAX_MESSAGE_LENGTH == 4096
+
+
+# ---------------------------------------------------------------------------
+# send() chunking tests
+# ---------------------------------------------------------------------------
+
+class TestSendChunking:
+    """WhatsApp send() splits long messages into chunks."""
+
+    @pytest.mark.asyncio
+    async def test_short_message_single_send(self):
+        adapter = _make_adapter()
+        resp = MagicMock(status=200)
+        resp.json = AsyncMock(return_value={"messageId": "msg1"})
+        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
+
+        result = await adapter.send("chat1", "short message")
+        assert result.success
+        # Only one call to bridge /send
+        assert adapter._http_session.post.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_long_message_chunked(self):
+        adapter = _make_adapter()
+        resp = MagicMock(status=200)
+        resp.json = AsyncMock(return_value={"messageId": "msg1"})
+        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
+
+        # Create a message longer than MAX_MESSAGE_LENGTH (4096)
+        long_msg = "a " * 3000  # ~6000 chars
+
+        result = await adapter.send("chat1", long_msg)
+        assert result.success
+        # Should have made multiple calls
+        assert adapter._http_session.post.call_count > 1
+
+    @pytest.mark.asyncio
+    async def test_empty_message_no_send(self):
+        adapter = _make_adapter()
+        result = await adapter.send("chat1", "")
+        assert result.success
+        assert adapter._http_session.post.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_whitespace_only_no_send(self):
+        adapter = _make_adapter()
+        result = await adapter.send("chat1", "   \n  ")
+        assert result.success
+        assert adapter._http_session.post.call_count == 0
+
+    @pytest.mark.asyncio
+    async def test_format_applied_before_send(self):
+        """Markdown should be converted to WhatsApp format before sending."""
+        adapter = _make_adapter()
+        resp = MagicMock(status=200)
+        resp.json = AsyncMock(return_value={"messageId": "msg1"})
+        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
+
+        await adapter.send("chat1", "**bold text**")
+
+        # Check the payload sent to the bridge
+        call_args = adapter._http_session.post.call_args
+        payload = call_args.kwargs.get("json") or call_args[1].get("json")
+        assert payload["message"] == "*bold text*"
+
+    @pytest.mark.asyncio
+    async def test_reply_to_only_on_first_chunk(self):
+        """reply_to should only be set on the first chunk."""
+        adapter = _make_adapter()
+        resp = MagicMock(status=200)
+        resp.json = AsyncMock(return_value={"messageId": "msg1"})
+        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
+
+        long_msg = "word " * 2000  # ~10000 chars, multiple chunks
+
+        await adapter.send("chat1", long_msg, reply_to="orig123")
+
+        calls = adapter._http_session.post.call_args_list
+        assert len(calls) > 1
+
+        # First chunk should have replyTo
+        first_payload = calls[0].kwargs.get("json") or calls[0][1].get("json")
+        assert first_payload.get("replyTo") == "orig123"
+
+        # Subsequent chunks should NOT have replyTo
+        for call in calls[1:]:
+            payload = call.kwargs.get("json") or call[1].get("json")
+            assert "replyTo" not in payload
+
+    @pytest.mark.asyncio
+    async def test_bridge_error_returns_failure(self):
+        adapter = _make_adapter()
+        resp = MagicMock(status=500)
+        resp.text = AsyncMock(return_value="Internal Server Error")
+        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
+
+        result = await adapter.send("chat1", "hello")
+        assert not result.success
+        assert "Internal Server Error" in result.error
+
+    @pytest.mark.asyncio
+    async def test_not_connected_returns_failure(self):
+        adapter = _make_adapter()
+        adapter._running = False
+
+        result = await adapter.send("chat1", "hello")
+        assert not result.success
+        assert "Not connected" in result.error
+
+
+# ---------------------------------------------------------------------------
+# display_config tier classification
+# ---------------------------------------------------------------------------
+
+class TestWhatsAppTier:
+    """WhatsApp should be classified as TIER_MEDIUM."""
+
+    def test_whatsapp_streaming_follows_global(self):
+        from gateway.display_config import resolve_display_setting
+        # TIER_MEDIUM has streaming: None (follow global), not False
+        assert resolve_display_setting({}, "whatsapp", "streaming") is None
+
+    def test_whatsapp_tool_progress_is_new(self):
+        from gateway.display_config import resolve_display_setting
+        assert resolve_display_setting({}, "whatsapp", "tool_progress") == "new"
@@ -14,6 +14,7 @@ from hermes_cli.auth import (
    PROVIDER_REGISTRY,
    _read_codex_tokens,
    _save_codex_tokens,
+    _write_codex_cli_tokens,
    _import_codex_cli_tokens,
    get_codex_auth_status,
    get_provider_auth_state,
@@ -161,7 +162,7 @@ def test_import_codex_cli_tokens_missing(tmp_path, monkeypatch):


 def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
-    """Verify Hermes never writes to ~/.codex/auth.json."""
+    """Verify _save_codex_tokens writes only to Hermes auth store, not ~/.codex/."""
    hermes_home = tmp_path / "hermes"
    codex_home = tmp_path / "codex-cli"
    hermes_home.mkdir(parents=True, exist_ok=True)
@@ -173,7 +174,7 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):

    _save_codex_tokens({"access_token": "hermes-at", "refresh_token": "hermes-rt"})

-    # ~/.codex/auth.json should NOT exist
+    # ~/.codex/auth.json should NOT exist — _save_codex_tokens only touches Hermes store
    assert not (codex_home / "auth.json").exists()

    # Hermes auth store should have the tokens
@@ -181,6 +182,98 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
    assert data["tokens"]["access_token"] == "hermes-at"


+def test_write_codex_cli_tokens_creates_file(tmp_path, monkeypatch):
+    """_write_codex_cli_tokens creates ~/.codex/auth.json with refreshed tokens."""
+    codex_home = tmp_path / "codex-cli"
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    _write_codex_cli_tokens("new-access", "new-refresh", last_refresh="2026-04-12T00:00:00Z")
+
+    auth_path = codex_home / "auth.json"
+    assert auth_path.exists()
+    data = json.loads(auth_path.read_text())
+    assert data["tokens"]["access_token"] == "new-access"
+    assert data["tokens"]["refresh_token"] == "new-refresh"
+    assert data["last_refresh"] == "2026-04-12T00:00:00Z"
+    # Verify file permissions are restricted
+    assert (auth_path.stat().st_mode & 0o777) == 0o600
+
+
+def test_write_codex_cli_tokens_preserves_existing(tmp_path, monkeypatch):
+    """_write_codex_cli_tokens preserves extra fields in existing auth.json."""
+    codex_home = tmp_path / "codex-cli"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    existing = {
+        "tokens": {
+            "access_token": "old-access",
+            "refresh_token": "old-refresh",
+            "extra_field": "preserved",
+        },
+        "last_refresh": "2026-01-01T00:00:00Z",
+        "custom_key": "keep_me",
+    }
+    (codex_home / "auth.json").write_text(json.dumps(existing))
+
+    _write_codex_cli_tokens("updated-access", "updated-refresh")
+
+    data = json.loads((codex_home / "auth.json").read_text())
+    assert data["tokens"]["access_token"] == "updated-access"
+    assert data["tokens"]["refresh_token"] == "updated-refresh"
+    assert data["tokens"]["extra_field"] == "preserved"
+    assert data["custom_key"] == "keep_me"
+    # last_refresh not updated since we didn't pass it
+    assert data["last_refresh"] == "2026-01-01T00:00:00Z"
+
+
+def test_write_codex_cli_tokens_handles_missing_dir(tmp_path, monkeypatch):
+    """_write_codex_cli_tokens creates parent directories if missing."""
+    codex_home = tmp_path / "does" / "not" / "exist"
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    _write_codex_cli_tokens("at", "rt")
+
+    assert (codex_home / "auth.json").exists()
+    data = json.loads((codex_home / "auth.json").read_text())
+    assert data["tokens"]["access_token"] == "at"
+
+
+def test_refresh_codex_auth_tokens_writes_back_to_cli(tmp_path, monkeypatch):
+    """After refreshing, _refresh_codex_auth_tokens writes back to ~/.codex/auth.json."""
+    from hermes_cli.auth import _refresh_codex_auth_tokens
+
+    hermes_home = tmp_path / "hermes"
+    codex_home = tmp_path / "codex-cli"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    codex_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    # Write initial CLI tokens
+    (codex_home / "auth.json").write_text(json.dumps({
+        "tokens": {"access_token": "old-at", "refresh_token": "old-rt"},
+    }))
+
+    # Mock the pure refresh to return new tokens
+    monkeypatch.setattr("hermes_cli.auth.refresh_codex_oauth_pure", lambda *a, **kw: {
+        "access_token": "refreshed-at",
+        "refresh_token": "refreshed-rt",
+        "last_refresh": "2026-04-12T01:00:00Z",
+    })
+
+    _refresh_codex_auth_tokens(
+        {"access_token": "old-at", "refresh_token": "old-rt"},
+        timeout_seconds=10,
+    )
+
+    # Verify CLI file was updated
+    cli_data = json.loads((codex_home / "auth.json").read_text())
+    assert cli_data["tokens"]["access_token"] == "refreshed-at"
+    assert cli_data["tokens"]["refresh_token"] == "refreshed-rt"
+
+
 def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
    hermes_home = tmp_path / "hermes"
    _setup_hermes_auth(hermes_home)
@@ -232,6 +232,44 @@ class TestBackup:
        assert len(zips) == 1


+# ---------------------------------------------------------------------------
+# _validate_backup_zip tests
+# ---------------------------------------------------------------------------
+
+class TestValidateBackupZip:
+    def _make_zip(self, zip_path: Path, filenames: list[str]) -> None:
+        with zipfile.ZipFile(zip_path, "w") as zf:
+            for name in filenames:
+                zf.writestr(name, "dummy")
+
+    def test_state_db_passes(self, tmp_path):
+        """A zip containing state.db is accepted as a valid Hermes backup."""
+        from hermes_cli.backup import _validate_backup_zip
+        zip_path = tmp_path / "backup.zip"
+        self._make_zip(zip_path, ["state.db", "sessions/abc.json"])
+        with zipfile.ZipFile(zip_path, "r") as zf:
+            ok, reason = _validate_backup_zip(zf)
+        assert ok, reason
+
+    def test_old_wrong_db_name_fails(self, tmp_path):
+        """A zip with only hermes_state.db (old wrong name) is rejected."""
+        from hermes_cli.backup import _validate_backup_zip
+        zip_path = tmp_path / "old.zip"
+        self._make_zip(zip_path, ["hermes_state.db", "memory_store.db"])
+        with zipfile.ZipFile(zip_path, "r") as zf:
+            ok, reason = _validate_backup_zip(zf)
+        assert not ok
+
+    def test_config_yaml_passes(self, tmp_path):
+        """A zip containing config.yaml is accepted (existing behaviour preserved)."""
+        from hermes_cli.backup import _validate_backup_zip
+        zip_path = tmp_path / "backup.zip"
+        self._make_zip(zip_path, ["config.yaml", "skills/x/SKILL.md"])
+        with zipfile.ZipFile(zip_path, "r") as zf:
+            ok, reason = _validate_backup_zip(zf)
+        assert ok, reason
+
+
 # ---------------------------------------------------------------------------
 # Import tests
 # ---------------------------------------------------------------------------
@@ -1,6 +1,7 @@
 """Tests for hermes claw commands."""

 from argparse import Namespace
+import subprocess
 from types import ModuleType
 from unittest.mock import MagicMock, patch

@@ -197,6 +198,11 @@ class TestClawCommand:
 class TestCmdMigrate:
    """Test the migrate command handler."""

+    @pytest.fixture(autouse=True)
+    def _mock_openclaw_running(self):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
+            yield
+
    def test_error_when_source_missing(self, tmp_path, capsys):
        args = Namespace(
            source=str(tmp_path / "nonexistent"),
@@ -626,3 +632,120 @@ class TestPrintMigrationReport:
        claw_mod._print_migration_report(report, dry_run=False)
        captured = capsys.readouterr()
        assert "Nothing to migrate" in captured.out
+
+
+class TestDetectOpenclawProcesses:
+    def test_returns_match_when_pgrep_finds_openclaw(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "linux"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                # systemd check misses, pgrep finds openclaw
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=1, stdout=""),  # systemctl
+                    MagicMock(returncode=0, stdout="1234\n"),  # pgrep
+                ]
+                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
+                result = claw_mod._detect_openclaw_processes()
+                assert len(result) == 1
+                assert "1234" in result[0]
+
+    def test_returns_empty_when_pgrep_finds_nothing(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "darwin"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=1, stdout=""),  # systemctl (not found)
+                    MagicMock(returncode=1, stdout=""),  # pgrep
+                ]
+                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
+                result = claw_mod._detect_openclaw_processes()
+                assert result == []
+
+    def test_detects_systemd_service(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "linux"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=0, stdout="active\n"),  # systemctl
+                    MagicMock(returncode=1, stdout=""),  # pgrep
+                ]
+                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
+                result = claw_mod._detect_openclaw_processes()
+                assert len(result) == 1
+                assert "systemd" in result[0]
+
+    def test_returns_match_on_windows_when_openclaw_exe_running(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "win32"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=0, stdout="openclaw.exe                 1234 Console    1     45,056 K\n"),
+                ]
+                result = claw_mod._detect_openclaw_processes()
+                assert len(result) >= 1
+                assert any("openclaw.exe" in r for r in result)
+
+    def test_returns_match_on_windows_when_node_exe_has_openclaw_in_cmdline(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "win32"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=0, stdout=""),  # tasklist openclaw.exe
+                    MagicMock(returncode=0, stdout=""),  # tasklist clawd.exe
+                    MagicMock(returncode=0, stdout="1234\n"),  # PowerShell
+                ]
+                result = claw_mod._detect_openclaw_processes()
+                assert len(result) >= 1
+                assert any("node.exe" in r for r in result)
+
+    def test_returns_empty_on_windows_when_nothing_found(self):
+        with patch.object(claw_mod, "sys") as mock_sys:
+            mock_sys.platform = "win32"
+            with patch.object(claw_mod, "subprocess") as mock_subprocess:
+                mock_subprocess.run.side_effect = [
+                    MagicMock(returncode=0, stdout=""),
+                    MagicMock(returncode=0, stdout=""),
+                    MagicMock(returncode=0, stdout=""),
+                ]
+                result = claw_mod._detect_openclaw_processes()
+                assert result == []
+
+
+class TestWarnIfOpenclawRunning:
+    def test_noop_when_not_running(self, capsys):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
+            claw_mod._warn_if_openclaw_running(auto_yes=False)
+        captured = capsys.readouterr()
+        assert captured.out == ""
+
+    def test_warns_and_exits_when_running_and_user_declines(self, capsys):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
+            with patch.object(claw_mod, "prompt_yes_no", return_value=False):
+                with patch.object(claw_mod.sys.stdin, "isatty", return_value=True):
+                    with pytest.raises(SystemExit) as exc_info:
+                        claw_mod._warn_if_openclaw_running(auto_yes=False)
+        assert exc_info.value.code == 0
+        captured = capsys.readouterr()
+        assert "OpenClaw appears to be running" in captured.out
+
+    def test_warns_and_continues_when_running_and_user_accepts(self, capsys):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
+            with patch.object(claw_mod, "prompt_yes_no", return_value=True):
+                with patch.object(claw_mod.sys.stdin, "isatty", return_value=True):
+                    claw_mod._warn_if_openclaw_running(auto_yes=False)
+        captured = capsys.readouterr()
+        assert "OpenClaw appears to be running" in captured.out
+
+    def test_warns_and_continues_in_auto_yes_mode(self, capsys):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
+            claw_mod._warn_if_openclaw_running(auto_yes=True)
+        captured = capsys.readouterr()
+        assert "OpenClaw appears to be running" in captured.out
+
+    def test_warns_and_continues_in_non_interactive_session(self, capsys):
+        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
+            with patch.object(claw_mod.sys.stdin, "isatty", return_value=False):
+                claw_mod._warn_if_openclaw_running(auto_yes=False)
+        captured = capsys.readouterr()
+        assert "OpenClaw appears to be running" in captured.out
+        assert "Non-interactive session" in captured.out
@@ -12,49 +12,10 @@ from unittest.mock import MagicMock, patch
 import pytest

 from hermes_cli.config import (
-    _is_inside_container,
    get_container_exec_info,
 )


-# =============================================================================
-# _is_inside_container
-# =============================================================================
-
-
-def test_is_inside_container_dockerenv():
-    """Detects /.dockerenv marker file."""
-    with patch("os.path.exists") as mock_exists:
-        mock_exists.side_effect = lambda p: p == "/.dockerenv"
-        assert _is_inside_container() is True
-
-
-def test_is_inside_container_containerenv():
-    """Detects Podman's /run/.containerenv marker."""
-    with patch("os.path.exists") as mock_exists:
-        mock_exists.side_effect = lambda p: p == "/run/.containerenv"
-        assert _is_inside_container() is True
-
-
-def test_is_inside_container_cgroup_docker():
-    """Detects 'docker' in /proc/1/cgroup."""
-    with patch("os.path.exists", return_value=False), \
-         patch("builtins.open", create=True) as mock_open:
-        mock_open.return_value.__enter__ = lambda s: s
-        mock_open.return_value.__exit__ = MagicMock(return_value=False)
-        mock_open.return_value.read = MagicMock(
-            return_value="12:memory:/docker/abc123\n"
-        )
-        assert _is_inside_container() is True
-
-
-def test_is_inside_container_false_on_host():
-    """Returns False when none of the container indicators are present."""
-    with patch("os.path.exists", return_value=False), \
-         patch("builtins.open", side_effect=OSError("no such file")):
-        assert _is_inside_container() is False
-
-
 # =============================================================================
 # get_container_exec_info
 # =============================================================================
@@ -81,7 +42,7 @@ def container_env(tmp_path, monkeypatch):

 def test_get_container_exec_info_returns_metadata(container_env):
    """Reads .container-mode and returns all fields including exec_user."""
-    with patch("hermes_cli.config._is_inside_container", return_value=False):
+    with patch("hermes_constants.is_container", return_value=False):
        info = get_container_exec_info()

    assert info is not None
@@ -93,7 +54,7 @@ def test_get_container_exec_info_returns_metadata(container_env):

 def test_get_container_exec_info_none_inside_container(container_env):
    """Returns None when we're already inside a container."""
-    with patch("hermes_cli.config._is_inside_container", return_value=True):
+    with patch("hermes_constants.is_container", return_value=True):
        info = get_container_exec_info()

    assert info is None
@@ -106,7 +67,7 @@ def test_get_container_exec_info_none_without_file(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
    monkeypatch.delenv("HERMES_DEV", raising=False)

-    with patch("hermes_cli.config._is_inside_container", return_value=False):
+    with patch("hermes_constants.is_container", return_value=False):
        info = get_container_exec_info()

    assert info is None
@@ -116,7 +77,7 @@ def test_get_container_exec_info_skipped_when_hermes_dev(container_env, monkeypa
    """Returns None when HERMES_DEV=1 is set (dev mode bypass)."""
    monkeypatch.setenv("HERMES_DEV", "1")

-    with patch("hermes_cli.config._is_inside_container", return_value=False):
+    with patch("hermes_constants.is_container", return_value=False):
        info = get_container_exec_info()

    assert info is None
@@ -126,7 +87,7 @@ def test_get_container_exec_info_not_skipped_when_hermes_dev_zero(container_env,
    """HERMES_DEV=0 does NOT trigger bypass — only '1' does."""
    monkeypatch.setenv("HERMES_DEV", "0")

-    with patch("hermes_cli.config._is_inside_container", return_value=False):
+    with patch("hermes_constants.is_container", return_value=False):
        info = get_container_exec_info()

    assert info is not None
@@ -143,7 +104,7 @@ def test_get_container_exec_info_defaults():
            "# minimal file with no keys\n"
        )

-        with patch("hermes_cli.config._is_inside_container", return_value=False), \
+        with patch("hermes_constants.is_container", return_value=False), \
             patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \
             patch.dict(os.environ, {}, clear=False):
            os.environ.pop("HERMES_DEV", None)
@@ -165,7 +126,7 @@ def test_get_container_exec_info_docker_backend(container_env):
        "hermes_bin=/opt/hermes/bin/hermes\n"
    )

-    with patch("hermes_cli.config._is_inside_container", return_value=False):
+    with patch("hermes_constants.is_container", return_value=False):
        info = get_container_exec_info()

    assert info["backend"] == "docker"
@@ -176,7 +137,7 @@ def test_get_container_exec_info_docker_backend(container_env):

 def test_get_container_exec_info_crashes_on_permission_error(container_env):
    """PermissionError propagates instead of being silently swallowed."""
-    with patch("hermes_cli.config._is_inside_container", return_value=False), \
+    with patch("hermes_constants.is_container", return_value=False), \
         patch("builtins.open", side_effect=PermissionError("permission denied")):
        with pytest.raises(PermissionError):
            get_container_exec_info()
@@ -122,3 +122,54 @@ class TestCustomProviderModelSwitch:
        model = config.get("model")
        assert isinstance(model, dict)
        assert model["default"] == "model-X"
+
+    def test_api_mode_set_from_provider_info(self, config_home):
+        """When custom_providers entry has api_mode, it should be applied."""
+        import yaml
+        from hermes_cli.main import _model_flow_named_custom
+
+        provider_info = {
+            "name": "Anthropic Proxy",
+            "base_url": "https://proxy.example.com/anthropic",
+            "api_key": "***",
+            "model": "claude-3",
+            "api_mode": "anthropic_messages",
+        }
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \
+             patch.dict("sys.modules", {"simple_term_menu": None}), \
+             patch("builtins.input", return_value="1"), \
+             patch("builtins.print"):
+            _model_flow_named_custom({}, provider_info)
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert model.get("api_mode") == "anthropic_messages"
+
+    def test_api_mode_cleared_when_not_specified(self, config_home):
+        """When custom_providers entry has no api_mode, stale api_mode is removed."""
+        import yaml
+        from hermes_cli.main import _model_flow_named_custom
+
+        # Pre-seed a stale api_mode in config
+        config_path = config_home / "config.yaml"
+        config_path.write_text(yaml.dump({"model": {"api_mode": "anthropic_messages"}}))
+
+        provider_info = {
+            "name": "My vLLM",
+            "base_url": "https://vllm.example.com/v1",
+            "api_key": "***",
+            "model": "llama-3",
+        }
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["llama-3"]), \
+             patch.dict("sys.modules", {"simple_term_menu": None}), \
+             patch("builtins.input", return_value="1"), \
+             patch("builtins.print"):
+            _model_flow_named_custom({}, provider_info)
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert "api_mode" not in model, "Stale api_mode should be removed"
@@ -0,0 +1,461 @@
+"""Tests for ``hermes debug`` CLI command and debug utilities."""
+
+import os
+import sys
+import urllib.error
+from pathlib import Path
+from unittest.mock import MagicMock, patch, call
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def hermes_home(tmp_path, monkeypatch):
+    """Set up an isolated HERMES_HOME with minimal logs."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    # Create log files
+    logs_dir = home / "logs"
+    logs_dir.mkdir()
+    (logs_dir / "agent.log").write_text(
+        "2026-04-12 17:00:00 INFO agent: session started\n"
+        "2026-04-12 17:00:01 INFO tools.terminal: running ls\n"
+        "2026-04-12 17:00:02 WARNING agent: high token usage\n"
+    )
+    (logs_dir / "errors.log").write_text(
+        "2026-04-12 17:00:05 ERROR gateway.run: connection lost\n"
+    )
+    (logs_dir / "gateway.log").write_text(
+        "2026-04-12 17:00:10 INFO gateway.run: started\n"
+    )
+
+    return home
+
+
+# ---------------------------------------------------------------------------
+# Unit tests for upload helpers
+# ---------------------------------------------------------------------------
+
+class TestUploadPasteRs:
+    """Test paste.rs upload path."""
+
+    def test_upload_paste_rs_success(self):
+        from hermes_cli.debug import _upload_paste_rs
+
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = b"https://paste.rs/abc123\n"
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
+            url = _upload_paste_rs("hello world")
+
+        assert url == "https://paste.rs/abc123"
+
+    def test_upload_paste_rs_bad_response(self):
+        from hermes_cli.debug import _upload_paste_rs
+
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = b"<html>error</html>"
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
+            with pytest.raises(ValueError, match="Unexpected response"):
+                _upload_paste_rs("test")
+
+    def test_upload_paste_rs_network_error(self):
+        from hermes_cli.debug import _upload_paste_rs
+
+        with patch(
+            "hermes_cli.debug.urllib.request.urlopen",
+            side_effect=urllib.error.URLError("connection refused"),
+        ):
+            with pytest.raises(urllib.error.URLError):
+                _upload_paste_rs("test")
+
+
+class TestUploadDpasteCom:
+    """Test dpaste.com fallback upload path."""
+
+    def test_upload_dpaste_com_success(self):
+        from hermes_cli.debug import _upload_dpaste_com
+
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = b"https://dpaste.com/ABCDEFG\n"
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
+            url = _upload_dpaste_com("hello world", expiry_days=7)
+
+        assert url == "https://dpaste.com/ABCDEFG"
+
+
+class TestUploadToPastebin:
+    """Test the combined upload with fallback."""
+
+    def test_tries_paste_rs_first(self):
+        from hermes_cli.debug import upload_to_pastebin
+
+        with patch("hermes_cli.debug._upload_paste_rs",
+                    return_value="https://paste.rs/test") as prs:
+            url = upload_to_pastebin("content")
+
+        assert url == "https://paste.rs/test"
+        prs.assert_called_once()
+
+    def test_falls_back_to_dpaste_com(self):
+        from hermes_cli.debug import upload_to_pastebin
+
+        with patch("hermes_cli.debug._upload_paste_rs",
+                    side_effect=Exception("down")), \
+             patch("hermes_cli.debug._upload_dpaste_com",
+                    return_value="https://dpaste.com/TEST") as dp:
+            url = upload_to_pastebin("content")
+
+        assert url == "https://dpaste.com/TEST"
+        dp.assert_called_once()
+
+    def test_raises_when_both_fail(self):
+        from hermes_cli.debug import upload_to_pastebin
+
+        with patch("hermes_cli.debug._upload_paste_rs",
+                    side_effect=Exception("err1")), \
+             patch("hermes_cli.debug._upload_dpaste_com",
+                    side_effect=Exception("err2")):
+            with pytest.raises(RuntimeError, match="Failed to upload"):
+                upload_to_pastebin("content")
+
+
+# ---------------------------------------------------------------------------
+# Log reading
+# ---------------------------------------------------------------------------
+
+class TestReadFullLog:
+    """Test _read_full_log for standalone log uploads."""
+
+    def test_reads_small_file(self, hermes_home):
+        from hermes_cli.debug import _read_full_log
+
+        content = _read_full_log("agent")
+        assert content is not None
+        assert "session started" in content
+
+    def test_returns_none_for_missing(self, tmp_path, monkeypatch):
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+
+        from hermes_cli.debug import _read_full_log
+        assert _read_full_log("agent") is None
+
+    def test_returns_none_for_empty(self, hermes_home):
+        # Truncate agent.log to empty
+        (hermes_home / "logs" / "agent.log").write_text("")
+
+        from hermes_cli.debug import _read_full_log
+        assert _read_full_log("agent") is None
+
+    def test_truncates_large_file(self, hermes_home):
+        """Files larger than max_bytes get tail-truncated."""
+        from hermes_cli.debug import _read_full_log
+
+        # Write a file larger than 1KB
+        big_content = "x" * 100 + "\n"
+        (hermes_home / "logs" / "agent.log").write_text(big_content * 200)
+
+        content = _read_full_log("agent", max_bytes=1024)
+        assert content is not None
+        assert "truncated" in content
+
+    def test_unknown_log_returns_none(self, hermes_home):
+        from hermes_cli.debug import _read_full_log
+        assert _read_full_log("nonexistent") is None
+
+    def test_falls_back_to_rotated_file(self, hermes_home):
+        """When gateway.log doesn't exist, falls back to gateway.log.1."""
+        from hermes_cli.debug import _read_full_log
+
+        logs_dir = hermes_home / "logs"
+        # Remove the primary (if any) and create a .1 rotation
+        (logs_dir / "gateway.log").unlink(missing_ok=True)
+        (logs_dir / "gateway.log.1").write_text(
+            "2026-04-12 10:00:00 INFO gateway.run: rotated content\n"
+        )
+
+        content = _read_full_log("gateway")
+        assert content is not None
+        assert "rotated content" in content
+
+    def test_prefers_primary_over_rotated(self, hermes_home):
+        """Primary log is used when it exists, even if .1 also exists."""
+        from hermes_cli.debug import _read_full_log
+
+        logs_dir = hermes_home / "logs"
+        (logs_dir / "gateway.log").write_text("primary content\n")
+        (logs_dir / "gateway.log.1").write_text("rotated content\n")
+
+        content = _read_full_log("gateway")
+        assert "primary content" in content
+        assert "rotated" not in content
+
+    def test_falls_back_when_primary_empty(self, hermes_home):
+        """Empty primary log falls back to .1 rotation."""
+        from hermes_cli.debug import _read_full_log
+
+        logs_dir = hermes_home / "logs"
+        (logs_dir / "agent.log").write_text("")
+        (logs_dir / "agent.log.1").write_text("rotated agent data\n")
+
+        content = _read_full_log("agent")
+        assert content is not None
+        assert "rotated agent data" in content
+
+
+# ---------------------------------------------------------------------------
+# Debug report collection
+# ---------------------------------------------------------------------------
+
+class TestCollectDebugReport:
+    """Test the debug report builder."""
+
+    def test_report_includes_dump_output(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump") as mock_dump:
+            mock_dump.side_effect = lambda args: print(
+                "--- hermes dump ---\nversion: 0.8.0\n--- end dump ---"
+            )
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- hermes dump ---" in report
+        assert "version: 0.8.0" in report
+
+    def test_report_includes_agent_log(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- agent.log" in report
+        assert "session started" in report
+
+    def test_report_includes_errors_log(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- errors.log" in report
+        assert "connection lost" in report
+
+    def test_report_includes_gateway_log(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- gateway.log" in report
+
+    def test_missing_logs_handled(self, tmp_path, monkeypatch):
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "(file not found)" in report
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point — run_debug_share
+# ---------------------------------------------------------------------------
+
+class TestRunDebugShare:
+    """Test the run_debug_share CLI handler."""
+
+    def test_local_flag_prints_full_logs(self, hermes_home, capsys):
+        """--local prints the report plus full log contents."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = True
+
+        with patch("hermes_cli.dump.run_dump"):
+            run_debug_share(args)
+
+        out = capsys.readouterr().out
+        assert "--- agent.log" in out
+        assert "FULL agent.log" in out
+        assert "FULL gateway.log" in out
+
+    def test_share_uploads_three_pastes(self, hermes_home, capsys):
+        """Successful share uploads report + agent.log + gateway.log."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        call_count = [0]
+        uploaded_content = []
+        def _mock_upload(content, expiry_days=7):
+            call_count[0] += 1
+            uploaded_content.append(content)
+            return f"https://paste.rs/paste{call_count[0]}"
+
+        with patch("hermes_cli.dump.run_dump") as mock_dump, \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    side_effect=_mock_upload):
+            mock_dump.side_effect = lambda a: print("--- hermes dump ---\nversion: test\n--- end dump ---")
+            run_debug_share(args)
+
+        out = capsys.readouterr().out
+        # Should have 3 uploads: report, agent.log, gateway.log
+        assert call_count[0] == 3
+        assert "paste.rs/paste1" in out  # Report
+        assert "paste.rs/paste2" in out  # agent.log
+        assert "paste.rs/paste3" in out  # gateway.log
+        assert "Report" in out
+        assert "agent.log" in out
+        assert "gateway.log" in out
+
+        # Each log paste should start with the dump header
+        agent_paste = uploaded_content[1]
+        assert "--- hermes dump ---" in agent_paste
+        assert "--- full agent.log ---" in agent_paste
+        gateway_paste = uploaded_content[2]
+        assert "--- hermes dump ---" in gateway_paste
+        assert "--- full gateway.log ---" in gateway_paste
+
+    def test_share_skips_missing_logs(self, tmp_path, monkeypatch, capsys):
+        """Only uploads logs that exist."""
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(home))
+
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        call_count = [0]
+        def _mock_upload(content, expiry_days=7):
+            call_count[0] += 1
+            return f"https://paste.rs/paste{call_count[0]}"
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    side_effect=_mock_upload):
+            run_debug_share(args)
+
+        out = capsys.readouterr().out
+        # Only the report should be uploaded (no log files exist)
+        assert call_count[0] == 1
+        assert "Report" in out
+
+    def test_share_continues_on_log_upload_failure(self, hermes_home, capsys):
+        """Log upload failure doesn't stop the report from being shared."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        call_count = [0]
+        def _mock_upload(content, expiry_days=7):
+            call_count[0] += 1
+            if call_count[0] > 1:
+                raise RuntimeError("upload failed")
+            return "https://paste.rs/report"
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    side_effect=_mock_upload):
+            run_debug_share(args)
+
+        out = capsys.readouterr().out
+        assert "Report" in out
+        assert "paste.rs/report" in out
+        assert "failed to upload" in out
+
+    def test_share_exits_on_report_upload_failure(self, hermes_home, capsys):
+        """If the main report fails to upload, exit with code 1."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    side_effect=RuntimeError("all failed")):
+            with pytest.raises(SystemExit) as exc_info:
+                run_debug_share(args)
+
+        assert exc_info.value.code == 1
+        out = capsys.readouterr()
+        assert "all failed" in out.err
+
+
+# ---------------------------------------------------------------------------
+# run_debug router
+# ---------------------------------------------------------------------------
+
+class TestRunDebug:
+    def test_no_subcommand_shows_usage(self, capsys):
+        from hermes_cli.debug import run_debug
+
+        args = MagicMock()
+        args.debug_command = None
+
+        run_debug(args)
+
+        out = capsys.readouterr().out
+        assert "hermes debug share" in out
+
+    def test_share_subcommand_routes(self, hermes_home):
+        from hermes_cli.debug import run_debug
+
+        args = MagicMock()
+        args.debug_command = "share"
+        args.lines = 200
+        args.expire = 7
+        args.local = True
+
+        with patch("hermes_cli.dump.run_dump"):
+            run_debug(args)
+
+
+# ---------------------------------------------------------------------------
+# Argparse integration
+# ---------------------------------------------------------------------------
+
+class TestArgparseIntegration:
+    def test_module_imports_clean(self):
+        from hermes_cli.debug import run_debug, run_debug_share
+        assert callable(run_debug)
+        assert callable(run_debug_share)
+
+    def test_cmd_debug_dispatches(self):
+        from hermes_cli.main import cmd_debug
+
+        args = MagicMock()
+        args.debug_command = None
+        cmd_debug(args)
@@ -394,6 +394,21 @@ class TestLaunchdServiceRecovery:


 class TestGatewayServiceDetection:
+    def test_supports_systemd_services_requires_systemctl_binary(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: None)
+
+        assert gateway_cli.supports_systemd_services() is False
+
+    def test_supports_systemd_services_returns_true_when_systemctl_present(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: "/usr/bin/systemctl")
+
+        assert gateway_cli.supports_systemd_services() is True
+
    def test_is_service_running_checks_system_scope_when_user_scope_is_inactive(self, monkeypatch):
        user_unit = SimpleNamespace(exists=lambda: True)
        system_unit = SimpleNamespace(exists=lambda: True)
@@ -418,6 +433,23 @@ class TestGatewayServiceDetection:

        assert gateway_cli._is_service_running() is True

+    def test_is_service_running_returns_false_when_systemctl_missing(self, monkeypatch):
+        unit = SimpleNamespace(exists=lambda: True)
+
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: unit,
+        )
+
+        def fake_run(*args, **kwargs):
+            raise FileNotFoundError("systemctl")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        assert gateway_cli._is_service_running() is False
+

 class TestGatewaySystemServiceRouting:
    def test_systemd_restart_self_requests_graceful_restart_without_reload_or_restart(self, monkeypatch, capsys):
@@ -1001,3 +1033,91 @@ class TestSystemUnitPathRemapping:
        # Target user paths should be present
        assert "/home/alice" in unit
        assert "WorkingDirectory=/home/alice/.hermes/hermes-agent" in unit
+
+
+class TestDockerAwareGateway:
+    """Tests for Docker container awareness in gateway commands."""
+
+    def test_run_systemctl_raises_runtimeerror_when_missing(self, monkeypatch):
+        """_run_systemctl raises RuntimeError with container guidance when systemctl is absent."""
+        import pytest
+
+        def fake_run(cmd, **kwargs):
+            raise FileNotFoundError("systemctl")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        with pytest.raises(RuntimeError, match="systemctl is not available"):
+            gateway_cli._run_systemctl(["start", "hermes-gateway"])
+
+    def test_run_systemctl_passes_through_on_success(self, monkeypatch):
+        """_run_systemctl delegates to subprocess.run when systemctl exists."""
+        calls = []
+
+        def fake_run(cmd, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        result = gateway_cli._run_systemctl(["status", "hermes-gateway"])
+        assert result.returncode == 0
+        assert len(calls) == 1
+        assert "status" in calls[0]
+
+    def test_install_in_container_prints_docker_guidance(self, monkeypatch, capsys):
+        """'hermes gateway install' inside Docker exits 0 with container guidance."""
+        import pytest
+
+        monkeypatch.setattr(gateway_cli, "is_managed", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
+
+        args = SimpleNamespace(gateway_command="install", force=False, system=False, run_as_user=None)
+        with pytest.raises(SystemExit) as exc_info:
+            gateway_cli.gateway_command(args)
+
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "Docker" in out or "docker" in out
+        assert "restart" in out.lower()
+
+    def test_uninstall_in_container_prints_docker_guidance(self, monkeypatch, capsys):
+        """'hermes gateway uninstall' inside Docker exits 0 with container guidance."""
+        import pytest
+
+        monkeypatch.setattr(gateway_cli, "is_managed", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
+
+        args = SimpleNamespace(gateway_command="uninstall", system=False)
+        with pytest.raises(SystemExit) as exc_info:
+            gateway_cli.gateway_command(args)
+
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "docker" in out.lower()
+
+    def test_start_in_container_prints_docker_guidance(self, monkeypatch, capsys):
+        """'hermes gateway start' inside Docker exits 0 with container guidance."""
+        import pytest
+
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
+
+        args = SimpleNamespace(gateway_command="start", system=False)
+        with pytest.raises(SystemExit) as exc_info:
+            gateway_cli.gateway_command(args)
+
+        assert exc_info.value.code == 0
+        out = capsys.readouterr().out
+        assert "docker" in out.lower()
+        assert "hermes gateway run" in out
@@ -54,14 +54,19 @@ class TestAnthropicDotToHyphen:

 # ── OpenCode Zen regression ────────────────────────────────────────────

-class TestOpenCodeZenDotToHyphen:
-    """OpenCode Zen follows Anthropic convention (dots→hyphens)."""
+class TestOpenCodeZenModelNormalization:
+    """OpenCode Zen preserves dots for most models, but Claude stays hyphenated."""

    @pytest.mark.parametrize("model,expected", [
        ("claude-sonnet-4.6", "claude-sonnet-4-6"),
-        ("glm-4.5", "glm-4-5"),
+        ("opencode-zen/claude-opus-4.5", "claude-opus-4-5"),
+        ("glm-4.5", "glm-4.5"),
+        ("glm-5.1", "glm-5.1"),
+        ("gpt-5.4", "gpt-5.4"),
+        ("minimax-m2.5-free", "minimax-m2.5-free"),
+        ("kimi-k2.5", "kimi-k2.5"),
    ])
-    def test_zen_converts_dots(self, model, expected):
+    def test_zen_normalizes_models(self, model, expected):
        result = normalize_model_for_provider(model, "opencode-zen")
        assert result == expected

@@ -69,6 +74,10 @@ class TestOpenCodeZenDotToHyphen:
        result = normalize_model_for_provider("opencode-zen/claude-sonnet-4.6", "opencode-zen")
        assert result == "claude-sonnet-4-6"

+    def test_zen_strips_vendor_prefix_for_non_claude(self):
+        result = normalize_model_for_provider("opencode-zen/glm-5.1", "opencode-zen")
+        assert result == "glm-5.1"
+

 # ── Copilot dot preservation (regression) ──────────────────────────────

@@ -257,3 +257,76 @@ class TestProviderPersistsAfterModelSave:
        assert model.get("provider") == "opencode-go"
        assert model.get("default") == "minimax-m2.5"
        assert model.get("api_mode") == "anthropic_messages"
+
+
+class TestBaseUrlValidation:
+    """Reject non-URL values in the base URL prompt (e.g. shell commands)."""
+
+    def test_invalid_base_url_rejected(self, config_home, monkeypatch, capsys):
+        """Typing a non-URL string should not be saved as the base URL."""
+        from hermes_cli.auth import PROVIDER_REGISTRY
+
+        pconfig = PROVIDER_REGISTRY.get("zai")
+        if not pconfig:
+            pytest.skip("zai not in PROVIDER_REGISTRY")
+
+        monkeypatch.setenv("GLM_API_KEY", "test-key")
+
+        from hermes_cli.main import _model_flow_api_key_provider
+        from hermes_cli.config import load_config, get_env_value
+
+        # User types a shell command instead of a URL at the base URL prompt
+        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
+             patch("hermes_cli.auth.deactivate_provider"), \
+             patch("builtins.input", return_value="nano ~/.hermes/.env"):
+            _model_flow_api_key_provider(load_config(), "zai", "old-model")
+
+        # The garbage value should NOT have been saved
+        saved = get_env_value("GLM_BASE_URL") or ""
+        assert not saved or saved.startswith(("http://", "https://")), \
+            f"Non-URL value was saved as GLM_BASE_URL: {saved}"
+        captured = capsys.readouterr()
+        assert "Invalid URL" in captured.out
+
+    def test_valid_base_url_accepted(self, config_home, monkeypatch):
+        """A proper URL should be saved normally."""
+        from hermes_cli.auth import PROVIDER_REGISTRY
+
+        pconfig = PROVIDER_REGISTRY.get("zai")
+        if not pconfig:
+            pytest.skip("zai not in PROVIDER_REGISTRY")
+
+        monkeypatch.setenv("GLM_API_KEY", "test-key")
+
+        from hermes_cli.main import _model_flow_api_key_provider
+        from hermes_cli.config import load_config, get_env_value
+
+        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
+             patch("hermes_cli.auth.deactivate_provider"), \
+             patch("builtins.input", return_value="https://custom.z.ai/api/paas/v4"):
+            _model_flow_api_key_provider(load_config(), "zai", "old-model")
+
+        saved = get_env_value("GLM_BASE_URL") or ""
+        assert saved == "https://custom.z.ai/api/paas/v4"
+
+    def test_empty_base_url_keeps_default(self, config_home, monkeypatch):
+        """Pressing Enter (empty) should not change the base URL."""
+        from hermes_cli.auth import PROVIDER_REGISTRY
+
+        pconfig = PROVIDER_REGISTRY.get("zai")
+        if not pconfig:
+            pytest.skip("zai not in PROVIDER_REGISTRY")
+
+        monkeypatch.setenv("GLM_API_KEY", "test-key")
+        monkeypatch.delenv("GLM_BASE_URL", raising=False)
+
+        from hermes_cli.main import _model_flow_api_key_provider
+        from hermes_cli.config import load_config, get_env_value
+
+        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
+             patch("hermes_cli.auth.deactivate_provider"), \
+             patch("builtins.input", return_value=""):
+            _model_flow_api_key_provider(load_config(), "zai", "old-model")
+
+        saved = get_env_value("GLM_BASE_URL") or ""
+        assert saved == "", "Empty input should not save a base URL"
@@ -177,7 +177,8 @@ class TestCreateProfile:
        # No error; optional files just not copied
        assert not (profile_dir / "config.yaml").exists()
        assert not (profile_dir / ".env").exists()
-        assert not (profile_dir / "SOUL.md").exists()
+        # SOUL.md is always seeded with the default even when clone source lacks it
+        assert (profile_dir / "SOUL.md").exists()


 # ===================================================================
@@ -1,5 +1,4 @@
-"""Tests for setup_model_provider — verifies the delegation to
-select_provider_and_model() and config dict sync."""
+"""Tests for setup.py configuration flows."""
 import json
 import sys
 import types
@@ -8,6 +7,7 @@ import pytest

 from hermes_cli.auth import get_active_provider
 from hermes_cli.config import load_config, save_config
+from hermes_cli import setup as setup_mod
 from hermes_cli.setup import setup_model_provider


@@ -144,6 +144,85 @@ def test_setup_custom_providers_synced(tmp_path, monkeypatch):
    assert reloaded.get("custom_providers") == [{"name": "Local", "base_url": "http://localhost:8080/v1"}]


+def test_setup_gateway_skips_service_install_when_systemctl_missing(monkeypatch, capsys):
+    env = {
+        "TELEGRAM_BOT_TOKEN": "",
+        "TELEGRAM_HOME_CHANNEL": "",
+        "DISCORD_BOT_TOKEN": "",
+        "DISCORD_HOME_CHANNEL": "",
+        "SLACK_BOT_TOKEN": "",
+        "SLACK_HOME_CHANNEL": "",
+        "MATRIX_HOMESERVER": "https://matrix.example.com",
+        "MATRIX_USER_ID": "@alice:example.com",
+        "MATRIX_PASSWORD": "",
+        "MATRIX_ACCESS_TOKEN": "token",
+        "BLUEBUBBLES_SERVER_URL": "",
+        "BLUEBUBBLES_HOME_CHANNEL": "",
+        "WHATSAPP_ENABLED": "",
+        "WEBHOOK_ENABLED": "",
+    }
+
+    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: env.get(key, ""))
+    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("platform.system", lambda: "Linux")
+
+    import hermes_cli.gateway as gateway_mod
+
+    monkeypatch.setattr(gateway_mod, "supports_systemd_services", lambda: False)
+    monkeypatch.setattr(gateway_mod, "is_macos", lambda: False)
+    monkeypatch.setattr(gateway_mod, "_is_service_installed", lambda: False)
+    monkeypatch.setattr(gateway_mod, "_is_service_running", lambda: False)
+
+    setup_mod.setup_gateway({})
+
+    out = capsys.readouterr().out
+    assert "Messaging platforms configured!" in out
+    assert "Start the gateway to bring your bots online:" in out
+    assert "hermes gateway" in out
+
+
+def test_setup_gateway_in_container_shows_docker_guidance(monkeypatch, capsys):
+    """setup_gateway() in a Docker container shows Docker-specific restart instructions."""
+    env = {
+        "TELEGRAM_BOT_TOKEN": "",
+        "TELEGRAM_HOME_CHANNEL": "",
+        "DISCORD_BOT_TOKEN": "",
+        "DISCORD_HOME_CHANNEL": "",
+        "SLACK_BOT_TOKEN": "",
+        "SLACK_HOME_CHANNEL": "",
+        "MATRIX_HOMESERVER": "https://matrix.example.com",
+        "MATRIX_USER_ID": "@alice:example.com",
+        "MATRIX_PASSWORD": "",
+        "MATRIX_ACCESS_TOKEN": "token",
+        "BLUEBUBBLES_SERVER_URL": "",
+        "BLUEBUBBLES_HOME_CHANNEL": "",
+        "WHATSAPP_ENABLED": "",
+        "WEBHOOK_ENABLED": "",
+    }
+
+    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: env.get(key, ""))
+    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("platform.system", lambda: "Linux")
+
+    import hermes_cli.gateway as gateway_mod
+
+    monkeypatch.setattr(gateway_mod, "supports_systemd_services", lambda: False)
+    monkeypatch.setattr(gateway_mod, "is_macos", lambda: False)
+    monkeypatch.setattr(gateway_mod, "_is_service_installed", lambda: False)
+    monkeypatch.setattr(gateway_mod, "_is_service_running", lambda: False)
+
+    # Patch is_container at the import location in setup.py
+    import hermes_constants
+    monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
+
+    setup_mod.setup_gateway({})
+
+    out = capsys.readouterr().out
+    assert "Messaging platforms configured!" in out
+    assert "docker" in out.lower() or "Docker" in out
+    assert "restart" in out.lower()
+
+
 def test_setup_syncs_custom_provider_removal_from_disk(tmp_path, monkeypatch):
    """Removing the last custom provider in model setup should persist."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -798,3 +798,120 @@ class TestFindGatewayPidsExclude:
        pids = gateway_cli.find_gateway_pids()

        assert pids == [100]
+
+
+# ---------------------------------------------------------------------------
+# Gateway mode writes exit code before restart (#8300)
+# ---------------------------------------------------------------------------
+
+
+class TestGatewayModeWritesExitCodeEarly:
+    """When running as ``hermes update --gateway``, the exit code marker must be
+    written *before* the gateway restart attempt.  Without this, systemd's
+    ``KillMode=mixed`` kills the update process (and its wrapping shell) during
+    the cgroup teardown, so the shell epilogue that normally writes the exit
+    code never executes.  The new gateway's update watcher then polls for 30
+    minutes and sends a spurious timeout message.
+    """
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_exit_code_written_in_gateway_mode(
+        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
+    ):
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        # Point HERMES_HOME at a temp dir so the marker file lands there
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        import hermes_cli.config as _cfg
+        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
+        # Also patch the module-level ref used by cmd_update
+        import hermes_cli.main as _main_mod
+        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
+
+        mock_run.side_effect = _make_run_side_effect(commit_count="1")
+
+        args = SimpleNamespace(gateway=True)
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(args)
+
+        exit_code_path = hermes_home / ".update_exit_code"
+        assert exit_code_path.exists(), ".update_exit_code not written in gateway mode"
+        assert exit_code_path.read_text() == "0"
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_exit_code_not_written_in_normal_mode(
+        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
+    ):
+        """Non-gateway mode should NOT write the exit code (the shell does it)."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        import hermes_cli.config as _cfg
+        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
+        import hermes_cli.main as _main_mod
+        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
+
+        mock_run.side_effect = _make_run_side_effect(commit_count="1")
+
+        args = SimpleNamespace(gateway=False)
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(args)
+
+        exit_code_path = hermes_home / ".update_exit_code"
+        assert not exit_code_path.exists(), ".update_exit_code should not be written outside gateway mode"
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_exit_code_written_before_restart_call(
+        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
+    ):
+        """Exit code must exist BEFORE systemctl restart is called."""
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        import hermes_cli.config as _cfg
+        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
+        import hermes_cli.main as _main_mod
+        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
+
+        exit_code_path = hermes_home / ".update_exit_code"
+
+        # Track whether exit code exists when systemctl restart is called
+        exit_code_existed_at_restart = []
+
+        original_side_effect = _make_run_side_effect(
+            commit_count="1", systemd_active=True,
+        )
+
+        def tracking_side_effect(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "systemctl" in joined and "restart" in joined:
+                exit_code_existed_at_restart.append(exit_code_path.exists())
+            return original_side_effect(cmd, **kwargs)
+
+        mock_run.side_effect = tracking_side_effect
+
+        args = SimpleNamespace(gateway=True)
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(args)
+
+        assert exit_code_existed_at_restart, "systemctl restart was never called"
+        assert exit_code_existed_at_restart[0] is True, \
+            ".update_exit_code must exist BEFORE systemctl restart (cgroup kill race)"
@@ -0,0 +1,675 @@
+"""Tests for hermes_cli.web_server and related config utilities."""
+
+import os
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from hermes_cli.config import (
+    DEFAULT_CONFIG,
+    reload_env,
+    redact_key,
+    _EXTRA_ENV_KEYS,
+    OPTIONAL_ENV_VARS,
+)
+
+
+# ---------------------------------------------------------------------------
+# reload_env tests
+# ---------------------------------------------------------------------------
+
+
+class TestReloadEnv:
+    """Tests for reload_env() — re-reads .env into os.environ."""
+
+    def test_adds_new_vars(self, tmp_path):
+        """reload_env() adds vars from .env that are not in os.environ."""
+        env_file = tmp_path / ".env"
+        env_file.write_text("TEST_RELOAD_VAR=hello123\n")
+        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+            os.environ.pop("TEST_RELOAD_VAR", None)
+            count = reload_env()
+            assert count >= 1
+            assert os.environ.get("TEST_RELOAD_VAR") == "hello123"
+        os.environ.pop("TEST_RELOAD_VAR", None)
+
+    def test_updates_changed_vars(self, tmp_path):
+        """reload_env() updates vars whose value changed on disk."""
+        env_file = tmp_path / ".env"
+        env_file.write_text("TEST_RELOAD_VAR=old_value\n")
+        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+            os.environ["TEST_RELOAD_VAR"] = "old_value"
+            # Now change the file
+            env_file.write_text("TEST_RELOAD_VAR=new_value\n")
+            count = reload_env()
+            assert count >= 1
+            assert os.environ.get("TEST_RELOAD_VAR") == "new_value"
+        os.environ.pop("TEST_RELOAD_VAR", None)
+
+    def test_removes_deleted_known_vars(self, tmp_path):
+        """reload_env() removes known Hermes vars not present in .env."""
+        env_file = tmp_path / ".env"
+        env_file.write_text("")  # empty .env
+        # Pick a known key from OPTIONAL_ENV_VARS
+        known_key = next(iter(OPTIONAL_ENV_VARS.keys()))
+        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+            os.environ[known_key] = "stale_value"
+            count = reload_env()
+            assert known_key not in os.environ
+            assert count >= 1
+
+    def test_does_not_remove_unknown_vars(self, tmp_path):
+        """reload_env() preserves non-Hermes env vars even when absent from .env."""
+        env_file = tmp_path / ".env"
+        env_file.write_text("")
+        with patch("hermes_cli.config.get_env_path", return_value=env_file):
+            os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me"
+            reload_env()
+            assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me"
+        os.environ.pop("MY_CUSTOM_UNRELATED_VAR", None)
+
+
+# ---------------------------------------------------------------------------
+# redact_key tests
+# ---------------------------------------------------------------------------
+
+
+class TestRedactKey:
+    def test_long_key_shows_prefix_suffix(self):
+        result = redact_key("sk-1234567890abcdef")
+        assert result.startswith("sk-1")
+        assert result.endswith("cdef")
+        assert "..." in result
+
+    def test_short_key_fully_masked(self):
+        assert redact_key("short") == "***"
+
+    def test_empty_key(self):
+        result = redact_key("")
+        assert "not set" in result.lower() or result == "***" or "\x1b" in result
+
+
+# ---------------------------------------------------------------------------
+# web_server tests (FastAPI endpoints)
+# ---------------------------------------------------------------------------
+
+
+class TestWebServerEndpoints:
+    """Test the FastAPI REST endpoints using Starlette TestClient."""
+
+    @pytest.fixture(autouse=True)
+    def _setup_test_client(self):
+        """Create a TestClient — import is deferred to avoid requiring fastapi."""
+        try:
+            from starlette.testclient import TestClient
+        except ImportError:
+            pytest.skip("fastapi/starlette not installed")
+
+        from hermes_cli.web_server import app
+        self.client = TestClient(app)
+
+    def test_get_status(self):
+        resp = self.client.get("/api/status")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "version" in data
+        assert "hermes_home" in data
+        assert "active_sessions" in data
+
+    def test_get_status_filters_unconfigured_gateway_platforms(self, monkeypatch):
+        import gateway.config as gateway_config
+        import hermes_cli.web_server as web_server
+
+        class _Platform:
+            def __init__(self, value):
+                self.value = value
+
+        class _GatewayConfig:
+            def get_connected_platforms(self):
+                return [_Platform("telegram")]
+
+        monkeypatch.setattr(web_server, "get_running_pid", lambda: 1234)
+        monkeypatch.setattr(
+            web_server,
+            "read_runtime_status",
+            lambda: {
+                "gateway_state": "running",
+                "updated_at": "2026-04-12T00:00:00+00:00",
+                "platforms": {
+                    "telegram": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
+                    "whatsapp": {"state": "retrying", "updated_at": "2026-04-12T00:00:00+00:00"},
+                    "feishu": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
+                },
+            },
+        )
+        monkeypatch.setattr(web_server, "check_config_version", lambda: (1, 1))
+        monkeypatch.setattr(gateway_config, "load_gateway_config", lambda: _GatewayConfig())
+
+        resp = self.client.get("/api/status")
+
+        assert resp.status_code == 200
+        assert resp.json()["gateway_platforms"] == {
+            "telegram": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
+        }
+
+    def test_get_status_hides_stale_platforms_when_gateway_not_running(self, monkeypatch):
+        import gateway.config as gateway_config
+        import hermes_cli.web_server as web_server
+
+        class _GatewayConfig:
+            def get_connected_platforms(self):
+                return []
+
+        monkeypatch.setattr(web_server, "get_running_pid", lambda: None)
+        monkeypatch.setattr(
+            web_server,
+            "read_runtime_status",
+            lambda: {
+                "gateway_state": "startup_failed",
+                "updated_at": "2026-04-12T00:00:00+00:00",
+                "platforms": {
+                    "whatsapp": {"state": "retrying", "updated_at": "2026-04-12T00:00:00+00:00"},
+                    "feishu": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
+                },
+            },
+        )
+        monkeypatch.setattr(web_server, "check_config_version", lambda: (1, 1))
+        monkeypatch.setattr(gateway_config, "load_gateway_config", lambda: _GatewayConfig())
+
+        resp = self.client.get("/api/status")
+
+        assert resp.status_code == 200
+        assert resp.json()["gateway_state"] == "startup_failed"
+        assert resp.json()["gateway_platforms"] == {}
+
+    def test_get_config_schema(self):
+        resp = self.client.get("/api/config/schema")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "fields" in data
+        assert "category_order" in data
+        schema = data["fields"]
+        assert len(schema) > 100  # Should have 150+ fields
+        assert "model" in schema
+        # Verify category_order is a non-empty list
+        assert isinstance(data["category_order"], list)
+        assert len(data["category_order"]) > 0
+        assert "general" in data["category_order"]
+
+    def test_get_config_defaults(self):
+        resp = self.client.get("/api/config/defaults")
+        assert resp.status_code == 200
+        defaults = resp.json()
+        assert "model" in defaults
+
+    def test_get_env_vars(self):
+        resp = self.client.get("/api/env")
+        assert resp.status_code == 200
+        data = resp.json()
+        # Should contain known env var names
+        assert any(k.endswith("_API_KEY") or k.endswith("_TOKEN") for k in data.keys())
+
+    def test_reveal_env_var(self, tmp_path):
+        """POST /api/env/reveal should return the real unredacted value."""
+        from hermes_cli.config import save_env_value
+        from hermes_cli.web_server import _SESSION_TOKEN
+        save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345")
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "TEST_REVEAL_KEY"},
+            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["key"] == "TEST_REVEAL_KEY"
+        assert data["value"] == "super-secret-value-12345"
+
+    def test_reveal_env_var_not_found(self):
+        """POST /api/env/reveal should 404 for unknown keys."""
+        from hermes_cli.web_server import _SESSION_TOKEN
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "NONEXISTENT_KEY_XYZ"},
+            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
+        )
+        assert resp.status_code == 404
+
+    def test_reveal_env_var_no_token(self, tmp_path):
+        """POST /api/env/reveal without token should return 401."""
+        from hermes_cli.config import save_env_value
+        save_env_value("TEST_REVEAL_NOAUTH", "secret-value")
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "TEST_REVEAL_NOAUTH"},
+        )
+        assert resp.status_code == 401
+
+    def test_reveal_env_var_bad_token(self, tmp_path):
+        """POST /api/env/reveal with wrong token should return 401."""
+        from hermes_cli.config import save_env_value
+        save_env_value("TEST_REVEAL_BADAUTH", "secret-value")
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "TEST_REVEAL_BADAUTH"},
+            headers={"Authorization": "Bearer wrong-token-here"},
+        )
+        assert resp.status_code == 401
+
+    def test_session_token_endpoint(self):
+        """GET /api/auth/session-token should return a token."""
+        from hermes_cli.web_server import _SESSION_TOKEN
+        resp = self.client.get("/api/auth/session-token")
+        assert resp.status_code == 200
+        assert resp.json()["token"] == _SESSION_TOKEN
+
+    def test_path_traversal_blocked(self):
+        """Verify URL-encoded path traversal is blocked."""
+        # %2e%2e = ..
+        resp = self.client.get("/%2e%2e/%2e%2e/etc/passwd")
+        # Should return 200 with index.html (SPA fallback), not the actual file
+        assert resp.status_code in (200, 404)
+        if resp.status_code == 200:
+            # Should be the SPA fallback, not the system file
+            assert "root:" not in resp.text
+
+    def test_path_traversal_dotdot_blocked(self):
+        """Direct .. path traversal via encoded sequences."""
+        resp = self.client.get("/%2e%2e/hermes_cli/web_server.py")
+        assert resp.status_code in (200, 404)
+        if resp.status_code == 200:
+            assert "FastAPI" not in resp.text  # Should not serve the actual source
+
+
+# ---------------------------------------------------------------------------
+# _build_schema_from_config tests
+# ---------------------------------------------------------------------------
+
+
+class TestBuildSchemaFromConfig:
+    def test_produces_expected_field_count(self):
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        # DEFAULT_CONFIG has ~150+ leaf fields
+        assert len(CONFIG_SCHEMA) > 100
+
+    def test_schema_entries_have_required_fields(self):
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        for key, entry in list(CONFIG_SCHEMA.items())[:10]:
+            assert "type" in entry, f"Missing type for {key}"
+            assert "category" in entry, f"Missing category for {key}"
+
+    def test_overrides_applied(self):
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        # terminal.backend should be a select with options
+        if "terminal.backend" in CONFIG_SCHEMA:
+            entry = CONFIG_SCHEMA["terminal.backend"]
+            assert entry["type"] == "select"
+            assert "options" in entry
+            assert "local" in entry["options"]
+
+    def test_empty_prefix_produces_correct_keys(self):
+        from hermes_cli.web_server import _build_schema_from_config
+        test_config = {"model": "test", "nested": {"key": "val"}}
+        schema = _build_schema_from_config(test_config)
+        assert "model" in schema
+        assert "nested.key" in schema
+
+    def test_top_level_scalars_get_general_category(self):
+        """Top-level scalar fields should be in 'general' category."""
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        assert CONFIG_SCHEMA["model"]["category"] == "general"
+
+    def test_nested_keys_get_parent_category(self):
+        """Nested fields should use the top-level parent as their category."""
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        if "agent.max_turns" in CONFIG_SCHEMA:
+            assert CONFIG_SCHEMA["agent.max_turns"]["category"] == "agent"
+
+    def test_category_merge_applied(self):
+        """Small categories should be merged into larger ones."""
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        categories = {e["category"] for e in CONFIG_SCHEMA.values()}
+        # These should be merged away
+        assert "privacy" not in categories  # merged into security
+        assert "context" not in categories  # merged into agent
+
+    def test_no_single_field_categories(self):
+        """After merging, no category should have just 1 field."""
+        from hermes_cli.web_server import CONFIG_SCHEMA
+        from collections import Counter
+        cats = Counter(e["category"] for e in CONFIG_SCHEMA.values())
+        for cat, count in cats.items():
+            assert count >= 2, f"Category '{cat}' has only {count} field(s) — should be merged"
+
+
+# ---------------------------------------------------------------------------
+# Config round-trip tests
+# ---------------------------------------------------------------------------
+
+
+class TestConfigRoundTrip:
+    """Verify config survives GET → edit → PUT without data loss."""
+
+    @pytest.fixture(autouse=True)
+    def _setup(self):
+        try:
+            from starlette.testclient import TestClient
+        except ImportError:
+            pytest.skip("fastapi/starlette not installed")
+        from hermes_cli.web_server import app
+        self.client = TestClient(app)
+
+    def test_get_config_no_internal_keys(self):
+        """GET /api/config should not expose _config_version or _model_meta."""
+        config = self.client.get("/api/config").json()
+        internal = [k for k in config if k.startswith("_")]
+        assert not internal, f"Internal keys leaked to frontend: {internal}"
+
+    def test_get_config_model_is_string(self):
+        """GET /api/config should normalize model dict to a string."""
+        config = self.client.get("/api/config").json()
+        assert isinstance(config.get("model"), str), \
+            f"model should be string, got {type(config.get('model'))}"
+
+    def test_round_trip_preserves_model_subkeys(self):
+        """Save and reload should not lose model.provider, model.base_url, etc."""
+        from hermes_cli.config import load_config, save_config
+
+        # Set up a config with model as a dict (the common user config form)
+        save_config({
+            "model": {
+                "default": "anthropic/claude-sonnet-4",
+                "provider": "openrouter",
+                "base_url": "https://openrouter.ai/api/v1",
+                "api_mode": "openai",
+            }
+        })
+
+        before = load_config()
+        assert isinstance(before.get("model"), dict)
+        original_keys = set(before["model"].keys())
+
+        # GET → PUT unchanged
+        web_config = self.client.get("/api/config").json()
+        assert isinstance(web_config.get("model"), str), "GET should normalize model to string"
+
+        self.client.put("/api/config", json={"config": web_config})
+
+        after = load_config()
+        assert isinstance(after.get("model"), dict), "model should still be a dict after save"
+        assert set(after["model"].keys()) >= original_keys, \
+            f"Lost model subkeys: {original_keys - set(after['model'].keys())}"
+
+    def test_edit_model_name_preserved(self):
+        """Changing the model string should update model.default on disk."""
+        from hermes_cli.config import load_config
+
+        web_config = self.client.get("/api/config").json()
+        original_model = web_config["model"]
+
+        # Change model
+        web_config["model"] = "test/editing-model"
+        self.client.put("/api/config", json={"config": web_config})
+
+        after = load_config()
+        if isinstance(after.get("model"), dict):
+            assert after["model"]["default"] == "test/editing-model"
+        else:
+            assert after["model"] == "test/editing-model"
+
+        # Restore
+        web_config["model"] = original_model
+        self.client.put("/api/config", json={"config": web_config})
+
+    def test_edit_nested_value(self):
+        """Editing a nested config value should persist correctly."""
+        from hermes_cli.config import load_config
+
+        web_config = self.client.get("/api/config").json()
+        original_turns = web_config.get("agent", {}).get("max_turns")
+
+        # Change max_turns
+        if "agent" not in web_config:
+            web_config["agent"] = {}
+        web_config["agent"]["max_turns"] = 42
+
+        self.client.put("/api/config", json={"config": web_config})
+
+        after = load_config()
+        assert after.get("agent", {}).get("max_turns") == 42
+
+        # Restore
+        web_config["agent"]["max_turns"] = original_turns
+        self.client.put("/api/config", json={"config": web_config})
+
+    def test_schema_types_match_config_values(self):
+        """Every schema field should have a matching-type value in the config."""
+        config = self.client.get("/api/config").json()
+        schema_resp = self.client.get("/api/config/schema").json()
+        schema = schema_resp["fields"]
+
+        def get_nested(obj, path):
+            parts = path.split(".")
+            cur = obj
+            for p in parts:
+                if cur is None or not isinstance(cur, dict):
+                    return None
+                cur = cur.get(p)
+            return cur
+
+        mismatches = []
+        for key, entry in schema.items():
+            val = get_nested(config, key)
+            if val is None:
+                continue  # not set in user config — fine
+            expected = entry["type"]
+            if expected in ("string", "select") and not isinstance(val, str):
+                mismatches.append(f"{key}: expected str, got {type(val).__name__}")
+            elif expected == "number" and not isinstance(val, (int, float)):
+                mismatches.append(f"{key}: expected number, got {type(val).__name__}")
+            elif expected == "boolean" and not isinstance(val, bool):
+                mismatches.append(f"{key}: expected bool, got {type(val).__name__}")
+            elif expected == "list" and not isinstance(val, list):
+                mismatches.append(f"{key}: expected list, got {type(val).__name__}")
+        assert not mismatches, f"Type mismatches:\n" + "\n".join(mismatches)
+
+
+# ---------------------------------------------------------------------------
+# New feature endpoint tests
+# ---------------------------------------------------------------------------
+
+
+class TestNewEndpoints:
+    """Tests for session detail, logs, cron, skills, tools, raw config, analytics."""
+
+    @pytest.fixture(autouse=True)
+    def _setup(self):
+        try:
+            from starlette.testclient import TestClient
+        except ImportError:
+            pytest.skip("fastapi/starlette not installed")
+        from hermes_cli.web_server import app
+        self.client = TestClient(app)
+
+    def test_get_logs_default(self):
+        resp = self.client.get("/api/logs")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "file" in data
+        assert "lines" in data
+        assert isinstance(data["lines"], list)
+
+    def test_get_logs_invalid_file(self):
+        resp = self.client.get("/api/logs?file=nonexistent")
+        assert resp.status_code == 400
+
+    def test_cron_list(self):
+        resp = self.client.get("/api/cron/jobs")
+        assert resp.status_code == 200
+        assert isinstance(resp.json(), list)
+
+    def test_cron_job_not_found(self):
+        resp = self.client.get("/api/cron/jobs/nonexistent-id")
+        assert resp.status_code == 404
+
+    def test_skills_list(self):
+        resp = self.client.get("/api/skills")
+        assert resp.status_code == 200
+        skills = resp.json()
+        assert isinstance(skills, list)
+        if skills:
+            assert "name" in skills[0]
+            assert "enabled" in skills[0]
+
+    def test_skills_list_includes_disabled_skills(self, monkeypatch):
+        import tools.skills_tool as skills_tool
+        import hermes_cli.skills_config as skills_config
+        import hermes_cli.web_server as web_server
+
+        def _fake_find_all_skills(*, skip_disabled=False):
+            if skip_disabled:
+                return [
+                    {"name": "active-skill", "description": "active", "category": "demo"},
+                    {"name": "disabled-skill", "description": "disabled", "category": "demo"},
+                ]
+            return [
+                {"name": "active-skill", "description": "active", "category": "demo"},
+            ]
+
+        monkeypatch.setattr(skills_tool, "_find_all_skills", _fake_find_all_skills)
+        monkeypatch.setattr(skills_config, "get_disabled_skills", lambda config: {"disabled-skill"})
+        monkeypatch.setattr(web_server, "load_config", lambda: {"skills": {"disabled": ["disabled-skill"]}})
+
+        resp = self.client.get("/api/skills")
+
+        assert resp.status_code == 200
+        assert resp.json() == [
+            {
+                "name": "active-skill",
+                "description": "active",
+                "category": "demo",
+                "enabled": True,
+            },
+            {
+                "name": "disabled-skill",
+                "description": "disabled",
+                "category": "demo",
+                "enabled": False,
+            },
+        ]
+
+    def test_toolsets_list(self):
+        resp = self.client.get("/api/tools/toolsets")
+        assert resp.status_code == 200
+        toolsets = resp.json()
+        assert isinstance(toolsets, list)
+        if toolsets:
+            assert "name" in toolsets[0]
+            assert "label" in toolsets[0]
+            assert "enabled" in toolsets[0]
+
+    def test_toolsets_list_matches_cli_enabled_state(self, monkeypatch):
+        import hermes_cli.tools_config as tools_config
+        import toolsets as toolsets_module
+        import hermes_cli.web_server as web_server
+
+        monkeypatch.setattr(
+            tools_config,
+            "_get_effective_configurable_toolsets",
+            lambda: [
+                ("web", "🔍 Web Search & Scraping", "web_search, web_extract"),
+                ("skills", "📚 Skills", "list, view, manage"),
+                ("memory", "💾 Memory", "persistent memory across sessions"),
+            ],
+        )
+        monkeypatch.setattr(
+            tools_config,
+            "_get_platform_tools",
+            lambda config, platform, include_default_mcp_servers=False: {"web", "skills"},
+        )
+        monkeypatch.setattr(
+            tools_config,
+            "_toolset_has_keys",
+            lambda ts_key, config=None: ts_key != "web",
+        )
+        monkeypatch.setattr(
+            toolsets_module,
+            "resolve_toolset",
+            lambda name: {
+                "web": ["web_search", "web_extract"],
+                "skills": ["skills_list", "skill_view"],
+                "memory": ["memory_read"],
+            }[name],
+        )
+        monkeypatch.setattr(web_server, "load_config", lambda: {"platform_toolsets": {"cli": ["web", "skills"]}})
+
+        resp = self.client.get("/api/tools/toolsets")
+
+        assert resp.status_code == 200
+        assert resp.json() == [
+            {
+                "name": "web",
+                "label": "🔍 Web Search & Scraping",
+                "description": "web_search, web_extract",
+                "enabled": True,
+                "available": True,
+                "configured": False,
+                "tools": ["web_extract", "web_search"],
+            },
+            {
+                "name": "skills",
+                "label": "📚 Skills",
+                "description": "list, view, manage",
+                "enabled": True,
+                "available": True,
+                "configured": True,
+                "tools": ["skill_view", "skills_list"],
+            },
+            {
+                "name": "memory",
+                "label": "💾 Memory",
+                "description": "persistent memory across sessions",
+                "enabled": False,
+                "available": False,
+                "configured": True,
+                "tools": ["memory_read"],
+            },
+        ]
+
+    def test_config_raw_get(self):
+        resp = self.client.get("/api/config/raw")
+        assert resp.status_code == 200
+        assert "yaml" in resp.json()
+
+    def test_config_raw_put_valid(self):
+        resp = self.client.put(
+            "/api/config/raw",
+            json={"yaml_text": "model: test\ntoolsets:\n  - all\n"},
+        )
+        assert resp.status_code == 200
+        assert resp.json()["ok"] is True
+
+    def test_config_raw_put_invalid(self):
+        resp = self.client.put(
+            "/api/config/raw",
+            json={"yaml_text": "- this is a list not a dict"},
+        )
+        assert resp.status_code == 400
+
+    def test_analytics_usage(self):
+        resp = self.client.get("/api/analytics/usage?days=7")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "daily" in data
+        assert "by_model" in data
+        assert "totals" in data
+        assert isinstance(data["daily"], list)
+        assert "total_sessions" in data["totals"]
+
+    def test_session_token_endpoint(self):
+        from hermes_cli.web_server import _SESSION_TOKEN
+        resp = self.client.get("/api/auth/session-token")
+        assert resp.status_code == 200
+        assert resp.json()["token"] == _SESSION_TOKEN
@@ -26,6 +26,7 @@ def _make_agent(
    agent.provider = "openrouter"
    agent.base_url = "https://openrouter.ai/api/v1"
    agent.api_key = "sk-test"
+    agent.api_mode = "chat_completions"
    agent.quiet_mode = True
    agent.log_prefix = ""
    agent.compression_enabled = compression_enabled
@@ -37,6 +38,7 @@ def _make_agent(
    agent.status_callback = None
    agent.tool_progress_callback = None
    agent._compression_warning = None
+    agent.config = None

    compressor = MagicMock(spec=ContextCompressor)
    compressor.context_length = main_context
@@ -99,6 +101,94 @@ def test_no_warning_when_aux_context_sufficient(mock_get_client, mock_ctx_len):
    assert agent._compression_warning is None


+def test_feasibility_check_passes_live_main_runtime():
+    """Compression feasibility should probe using the live session runtime."""
+    agent = _make_agent(main_context=200_000, threshold_percent=0.50)
+    agent.model = "gpt-5.4"
+    agent.provider = "openai-codex"
+    agent.base_url = "https://chatgpt.com/backend-api/codex"
+    agent.api_key = "codex-token"
+    agent.api_mode = "codex_responses"
+
+    mock_client = MagicMock()
+    mock_client.base_url = "https://chatgpt.com/backend-api/codex"
+    mock_client.api_key = "codex-token"
+
+    with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_client, "gpt-5.4")) as mock_get_client, \
+         patch("agent.model_metadata.get_model_context_length", return_value=200_000):
+        agent._emit_status = lambda msg: None
+        agent._check_compression_model_feasibility()
+
+    mock_get_client.assert_called_once_with(
+        "compression",
+        main_runtime={
+            "model": "gpt-5.4",
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "api_mode": "codex_responses",
+        },
+    )
+
+
+@patch("agent.model_metadata.get_model_context_length", return_value=1_000_000)
+@patch("agent.auxiliary_client.get_text_auxiliary_client")
+def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ctx_len):
+    """auxiliary.compression.context_length from config is forwarded to
+    get_model_context_length so custom endpoints that lack /models still
+    report the correct context window (fixes #8499)."""
+    agent = _make_agent(main_context=200_000, threshold_percent=0.85)
+    agent.config = {
+        "auxiliary": {
+            "compression": {
+                "context_length": 1_000_000,
+            },
+        },
+    }
+    mock_client = MagicMock()
+    mock_client.base_url = "http://custom-endpoint:8080/v1"
+    mock_client.api_key = "sk-custom"
+    mock_get_client.return_value = (mock_client, "custom/big-model")
+
+    agent._emit_status = lambda msg: None
+    agent._check_compression_model_feasibility()
+
+    mock_ctx_len.assert_called_once_with(
+        "custom/big-model",
+        base_url="http://custom-endpoint:8080/v1",
+        api_key="sk-custom",
+        config_context_length=1_000_000,
+    )
+
+
+@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
+@patch("agent.auxiliary_client.get_text_auxiliary_client")
+def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_ctx_len):
+    """Non-integer context_length in config is silently ignored."""
+    agent = _make_agent(main_context=200_000, threshold_percent=0.50)
+    agent.config = {
+        "auxiliary": {
+            "compression": {
+                "context_length": "not-a-number",
+            },
+        },
+    }
+    mock_client = MagicMock()
+    mock_client.base_url = "http://custom:8080/v1"
+    mock_client.api_key = "sk-test"
+    mock_get_client.return_value = (mock_client, "custom/model")
+
+    agent._emit_status = lambda msg: None
+    agent._check_compression_model_feasibility()
+
+    mock_ctx_len.assert_called_once_with(
+        "custom/model",
+        base_url="http://custom:8080/v1",
+        api_key="sk-test",
+        config_context_length=None,
+    )
+
+
@patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_warns_when_no_auxiliary_provider(mock_get_client):
    """Warning emitted when no auxiliary provider is configured."""
@@ -302,6 +302,17 @@ class TestStripThinkBlocks:
        assert "<think>" not in result
        assert "visible" in result

+    def test_thought_block_removed(self, agent):
+        """Gemma 4 uses <thought> tags for inline reasoning."""
+        result = agent._strip_think_blocks("<thought>internal reasoning</thought> answer")
+        assert "internal reasoning" not in result
+        assert "<thought>" not in result
+        assert "answer" in result
+
+    def test_orphaned_thought_tag(self, agent):
+        result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
+        assert "<thought>" not in result
+

 class TestExtractReasoning:
    def test_reasoning_field(self, agent):
@@ -1730,9 +1741,9 @@ class TestRunConversation:
            {"role": "assistant", "content": "old answer"},
        ]

-        # 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
+        # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion
        with (
-            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
+            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6),
            patch.object(agent, "_compress_context") as mock_compress,
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1743,18 +1754,18 @@ class TestRunConversation:
        mock_compress.assert_not_called()  # no compression triggered
        assert result["completed"] is True
        assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations
+        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries

    def test_reasoning_only_response_prefill_then_empty(self, agent):
-        """Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
+        """Structured reasoning-only triggers prefill (2), then retries (3), then (empty)."""
        self._setup_agent(agent)
        empty_resp = _mock_response(
            content=None,
            finish_reason="stop",
            reasoning_content="structured reasoning answer",
        )
-        # 3 responses: original + 2 prefill continuations, all reasoning-only
-        agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
+        # 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion
+        agent.client.chat.completions.create.side_effect = [empty_resp] * 6
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1763,7 +1774,7 @@ class TestRunConversation:
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
        assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations
+        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries

    def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
        """When prefill continuation produces content, it becomes the final response."""
@@ -1938,6 +1949,88 @@ class TestRunConversation:
        failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()]
        assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}"

+    def test_partial_stream_recovery_uses_streamed_content(self, agent):
+        """When streaming fails after partial delivery, recovered partial content becomes final response."""
+        self._setup_agent(agent)
+        # Simulate a partial-stream-stub response: content recovered from streaming
+        partial_resp = _mock_response(
+            content="Here is the partial answer that was stream",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.return_value = partial_resp
+        # Simulate that streaming had already delivered this text
+        agent._current_streamed_assistant_text = "Here is the partial answer that was stream"
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("explain something")
+        # The partial content should be used as-is (not empty, not retried)
+        assert result["completed"] is True
+        assert result["final_response"] == "Here is the partial answer that was stream"
+        assert result["api_calls"] == 1  # No retries
+
+    def test_partial_stream_recovery_on_empty_stub(self, agent):
+        """When stub response has no content but text was streamed, use streamed text."""
+        self._setup_agent(agent)
+        # Stub response with no content (old behavior before fix)
+        empty_stub = _mock_response(content=None, finish_reason="stop")
+
+        def _fake_api_call(api_kwargs):
+            # Simulate what streaming does: accumulate text before returning
+            # a stub with no content (connection died mid-stream)
+            agent._current_streamed_assistant_text = "The answer to your question is that"
+            return empty_stub
+
+        status_messages = []
+
+        def _capture_status(msg):
+            status_messages.append(msg)
+
+        with (
+            patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+            patch.object(agent, "_emit_status", side_effect=_capture_status),
+        ):
+            result = agent.run_conversation("ask me")
+        # Should recover partial streamed content, not fall through to (empty)
+        assert result["completed"] is True
+        assert result["final_response"] == "The answer to your question is that"
+        assert result["api_calls"] == 1  # No wasted retries
+        # Should emit the stream-interrupted status, NOT the empty-retry status
+        recovery_msgs = [m for m in status_messages if "stream interrupted" in m.lower()]
+        assert len(recovery_msgs) >= 1, f"Expected stream recovery status, got: {status_messages}"
+        # Should NOT have retry statuses
+        retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
+        assert len(retry_msgs) == 0, f"Should not retry when stream content exists: {status_messages}"
+
+    def test_partial_stream_recovery_preempts_prior_turn_fallback(self, agent):
+        """Partial streamed content takes priority over _last_content_with_tools fallback."""
+        self._setup_agent(agent)
+        # Set up the prior-turn fallback content (from a previous turn with tool calls)
+        agent._last_content_with_tools = "Old content from prior turn with tools"
+        # Stub response with no content
+        empty_stub = _mock_response(content=None, finish_reason="stop")
+
+        def _fake_api_call(api_kwargs):
+            # Simulate partial streaming before connection death
+            agent._current_streamed_assistant_text = "Fresh partial content from this turn"
+            return empty_stub
+
+        with (
+            patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("question")
+        # Should use the streamed content, not the old prior-turn fallback
+        assert result["final_response"] == "Fresh partial content from this turn"
+        assert result["api_calls"] == 1
+
    def test_nous_401_refreshes_after_remint_and_retries(self, agent):
        self._setup_agent(agent)
        agent.provider = "nous"
@@ -0,0 +1,120 @@
+"""Tests for empty model fallback — when provider is configured but model is missing."""
+
+from unittest.mock import MagicMock, patch
+import pytest
+
+
+class TestGetDefaultModelForProvider:
+    """Unit tests for hermes_cli.models.get_default_model_for_provider."""
+
+    def test_known_provider_returns_first_model(self):
+        from hermes_cli.models import get_default_model_for_provider
+        result = get_default_model_for_provider("openai-codex")
+        # Should return first model from _PROVIDER_MODELS["openai-codex"]
+        assert result
+        assert isinstance(result, str)
+
+    def test_openrouter_returns_empty(self):
+        """OpenRouter uses dynamic model fetch, no static catalog entry."""
+        from hermes_cli.models import get_default_model_for_provider
+        # OpenRouter is not in _PROVIDER_MODELS — it uses live fetching
+        result = get_default_model_for_provider("openrouter")
+        assert result == ""
+
+    def test_unknown_provider_returns_empty(self):
+        from hermes_cli.models import get_default_model_for_provider
+        assert get_default_model_for_provider("nonexistent-provider") == ""
+
+    def test_custom_provider_returns_empty(self):
+        """Custom provider has no model catalog — should return empty."""
+        from hermes_cli.models import get_default_model_for_provider
+        # Custom providers don't have entries in _PROVIDER_MODELS
+        assert get_default_model_for_provider("some-random-custom") == ""
+
+
+class TestGatewayEmptyModelFallback:
+    """Test that _resolve_session_agent_runtime fills in empty model from provider catalog."""
+
+    def test_empty_model_filled_from_provider(self):
+        """When config has no model but provider is openai-codex, use first codex model."""
+        from gateway.run import GatewayRunner
+
+        runner = object.__new__(GatewayRunner)
+        runner._session_model_overrides = {}
+
+        # Mock _resolve_gateway_model to return empty string
+        # Mock _resolve_runtime_agent_kwargs to return openai-codex provider
+        with patch("gateway.run._resolve_gateway_model", return_value=""), \
+             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
+                 "provider": "openai-codex",
+                 "api_key": "test-key",
+                 "base_url": "https://chatgpt.com/backend-api/codex",
+                 "api_mode": "codex_responses",
+             }):
+            model, kwargs = runner._resolve_session_agent_runtime()
+
+        # Model should have been filled in from provider catalog
+        assert model, "Model should not be empty when provider is known"
+        assert isinstance(model, str)
+        assert kwargs["provider"] == "openai-codex"
+
+    def test_nonempty_model_not_overridden(self):
+        """When config has a model set, don't override it."""
+        from gateway.run import GatewayRunner
+
+        runner = object.__new__(GatewayRunner)
+        runner._session_model_overrides = {}
+
+        with patch("gateway.run._resolve_gateway_model", return_value="gpt-5.4"), \
+             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
+                 "provider": "openai-codex",
+                 "api_key": "test-key",
+                 "base_url": "https://chatgpt.com/backend-api/codex",
+                 "api_mode": "codex_responses",
+             }):
+            model, kwargs = runner._resolve_session_agent_runtime()
+
+        assert model == "gpt-5.4", "Explicit model should not be overridden"
+
+    def test_empty_model_no_provider_stays_empty(self):
+        """When both model and provider are empty, model stays empty."""
+        from gateway.run import GatewayRunner
+
+        runner = object.__new__(GatewayRunner)
+        runner._session_model_overrides = {}
+
+        with patch("gateway.run._resolve_gateway_model", return_value=""), \
+             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
+                 "provider": "",
+                 "api_key": "test-key",
+                 "base_url": "https://example.com",
+                 "api_mode": "chat_completions",
+             }):
+            model, kwargs = runner._resolve_session_agent_runtime()
+
+        # Can't fill in a default without knowing the provider
+        assert model == ""
+
+
+class TestResolveGatewayModel:
+    """Test _resolve_gateway_model reads model from config correctly."""
+
+    def test_returns_default_key(self):
+        from gateway.run import _resolve_gateway_model
+        assert _resolve_gateway_model({"model": {"default": "gpt-5.4"}}) == "gpt-5.4"
+
+    def test_returns_model_key_fallback(self):
+        from gateway.run import _resolve_gateway_model
+        assert _resolve_gateway_model({"model": {"model": "gpt-5.4"}}) == "gpt-5.4"
+
+    def test_returns_empty_when_missing(self):
+        from gateway.run import _resolve_gateway_model
+        assert _resolve_gateway_model({"model": {}}) == ""
+
+    def test_returns_empty_when_no_model_section(self):
+        from gateway.run import _resolve_gateway_model
+        assert _resolve_gateway_model({}) == ""
+
+    def test_string_model_config(self):
+        from gateway.run import _resolve_gateway_model
+        assert _resolve_gateway_model({"model": "my-model"}) == "my-model"
@@ -6,7 +6,8 @@ from unittest.mock import patch

 import pytest

-from hermes_constants import get_default_hermes_root
+import hermes_constants
+from hermes_constants import get_default_hermes_root, is_container


 class TestGetDefaultHermesRoot:
@@ -60,3 +61,53 @@ class TestGetDefaultHermesRoot:
        monkeypatch.setattr(Path, "home", lambda: tmp_path)
        monkeypatch.setenv("HERMES_HOME", str(profile))
        assert get_default_hermes_root() == docker_root
+
+
+class TestIsContainer:
+    """Tests for is_container() — Docker/Podman detection."""
+
+    def _reset_cache(self, monkeypatch):
+        """Reset the cached detection result before each test."""
+        monkeypatch.setattr(hermes_constants, "_container_detected", None)
+
+    def test_detects_dockerenv(self, monkeypatch, tmp_path):
+        """/.dockerenv triggers container detection."""
+        self._reset_cache(monkeypatch)
+        monkeypatch.setattr(os.path, "exists", lambda p: p == "/.dockerenv")
+        assert is_container() is True
+
+    def test_detects_containerenv(self, monkeypatch, tmp_path):
+        """/run/.containerenv triggers container detection (Podman)."""
+        self._reset_cache(monkeypatch)
+        monkeypatch.setattr(os.path, "exists", lambda p: p == "/run/.containerenv")
+        assert is_container() is True
+
+    def test_detects_cgroup_docker(self, monkeypatch, tmp_path):
+        """/proc/1/cgroup containing 'docker' triggers detection."""
+        import builtins
+        self._reset_cache(monkeypatch)
+        monkeypatch.setattr(os.path, "exists", lambda p: False)
+        cgroup_file = tmp_path / "cgroup"
+        cgroup_file.write_text("12:memory:/docker/abc123\n")
+        _real_open = builtins.open
+        monkeypatch.setattr("builtins.open", lambda p, *a, **kw: _real_open(str(cgroup_file), *a, **kw) if p == "/proc/1/cgroup" else _real_open(p, *a, **kw))
+        assert is_container() is True
+
+    def test_negative_case(self, monkeypatch, tmp_path):
+        """Returns False on a regular Linux host."""
+        import builtins
+        self._reset_cache(monkeypatch)
+        monkeypatch.setattr(os.path, "exists", lambda p: False)
+        cgroup_file = tmp_path / "cgroup"
+        cgroup_file.write_text("12:memory:/\n")
+        _real_open = builtins.open
+        monkeypatch.setattr("builtins.open", lambda p, *a, **kw: _real_open(str(cgroup_file), *a, **kw) if p == "/proc/1/cgroup" else _real_open(p, *a, **kw))
+        assert is_container() is False
+
+    def test_caches_result(self, monkeypatch):
+        """Second call uses cached value without re-probing."""
+        monkeypatch.setattr(hermes_constants, "_container_detected", True)
+        assert is_container() is True
+        # Even if we make os.path.exists return False, cached value wins
+        monkeypatch.setattr(os.path, "exists", lambda p: False)
+        assert is_container() is True
@@ -18,6 +18,7 @@ from tools.homeassistant_tool import (
    _handle_call_service,
    _BLOCKED_DOMAINS,
    _ENTITY_ID_RE,
+    _SERVICE_NAME_RE,
 )


@@ -303,6 +304,93 @@ class TestEntityIdValidation:
            assert "Invalid entity_id" not in result["error"]


+# ---------------------------------------------------------------------------
+# Security: domain/service name format validation
+# ---------------------------------------------------------------------------
+
+
+class TestServiceNameValidation:
+    """Verify domain/service format validation prevents path traversal in URL.
+
+    The domain and service parameters are interpolated into
+    /api/services/{domain}/{service}, so allowing arbitrary strings would
+    enable SSRF via path traversal or blocked-domain bypass.
+    """
+
+    def test_valid_domain_names(self):
+        assert _SERVICE_NAME_RE.match("light")
+        assert _SERVICE_NAME_RE.match("switch")
+        assert _SERVICE_NAME_RE.match("climate")
+        assert _SERVICE_NAME_RE.match("shell_command")
+        assert _SERVICE_NAME_RE.match("media_player")
+
+    def test_valid_service_names(self):
+        assert _SERVICE_NAME_RE.match("turn_on")
+        assert _SERVICE_NAME_RE.match("turn_off")
+        assert _SERVICE_NAME_RE.match("set_temperature")
+        assert _SERVICE_NAME_RE.match("toggle")
+
+    def test_path_traversal_in_domain_rejected(self):
+        assert _SERVICE_NAME_RE.match("../../api/config") is None
+        assert _SERVICE_NAME_RE.match("light/../../../etc") is None
+        assert _SERVICE_NAME_RE.match("../config") is None
+
+    def test_path_traversal_in_service_rejected(self):
+        assert _SERVICE_NAME_RE.match("../../api/config") is None
+        assert _SERVICE_NAME_RE.match("turn_on/../../config") is None
+
+    def test_blocked_domain_bypass_via_traversal_rejected(self):
+        """Ensure shell_command/../light is rejected, not just checked against blocklist."""
+        assert _SERVICE_NAME_RE.match("shell_command/../light") is None
+        assert _SERVICE_NAME_RE.match("python_script/../scene") is None
+        assert _SERVICE_NAME_RE.match("hassio/../automation") is None
+
+    def test_slashes_rejected(self):
+        assert _SERVICE_NAME_RE.match("light/turn_on") is None
+        assert _SERVICE_NAME_RE.match("a/b/c") is None
+
+    def test_dots_rejected(self):
+        assert _SERVICE_NAME_RE.match("light.turn_on") is None
+        assert _SERVICE_NAME_RE.match("..") is None
+
+    def test_uppercase_rejected(self):
+        assert _SERVICE_NAME_RE.match("LIGHT") is None
+        assert _SERVICE_NAME_RE.match("Turn_On") is None
+
+    def test_special_chars_rejected(self):
+        assert _SERVICE_NAME_RE.match("light;rm") is None
+        assert _SERVICE_NAME_RE.match("light&cmd") is None
+        assert _SERVICE_NAME_RE.match("light cmd") is None
+
+    def test_handler_rejects_traversal_domain(self):
+        """_handle_call_service must reject domain with path traversal."""
+        result = json.loads(_handle_call_service({
+            "domain": "../../api/config",
+            "service": "turn_on",
+        }))
+        assert "error" in result
+        assert "Invalid domain" in result["error"]
+
+    def test_handler_rejects_traversal_service(self):
+        """_handle_call_service must reject service with path traversal."""
+        result = json.loads(_handle_call_service({
+            "domain": "light",
+            "service": "../../api/config",
+        }))
+        assert "error" in result
+        assert "Invalid service" in result["error"]
+
+    def test_handler_rejects_blocklist_bypass_traversal(self):
+        """Blocklist bypass via shell_command/../light must be caught by format validation."""
+        result = json.loads(_handle_call_service({
+            "domain": "shell_command/../light",
+            "service": "turn_on",
+        }))
+        assert "error" in result
+        # Must be rejected as "Invalid domain", not slip through the blocklist
+        assert "Invalid domain" in result["error"]
+
+
 # ---------------------------------------------------------------------------
 # Availability check
 # ---------------------------------------------------------------------------
@@ -0,0 +1,145 @@
+"""Tests for TTS speed configuration across providers."""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def clean_env(monkeypatch):
+    for key in ("OPENAI_API_KEY", "MINIMAX_API_KEY", "HERMES_SESSION_PLATFORM"):
+        monkeypatch.delenv(key, raising=False)
+
+
+# ---------------------------------------------------------------------------
+# Edge TTS speed
+# ---------------------------------------------------------------------------
+
+class TestEdgeTtsSpeed:
+    def _run(self, tts_config, tmp_path):
+        mock_comm = MagicMock()
+        mock_comm.save = AsyncMock()
+        mock_edge = MagicMock()
+        mock_edge.Communicate = MagicMock(return_value=mock_comm)
+
+        with patch("tools.tts_tool._import_edge_tts", return_value=mock_edge):
+            from tools.tts_tool import _generate_edge_tts
+            asyncio.run(_generate_edge_tts("Hello", str(tmp_path / "out.mp3"), tts_config))
+        return mock_edge.Communicate
+
+    def test_default_no_rate_kwarg(self, tmp_path):
+        """No speed config => no rate kwarg passed to Communicate."""
+        comm_cls = self._run({}, tmp_path)
+        kwargs = comm_cls.call_args[1]
+        assert "rate" not in kwargs
+
+    def test_global_speed_applied(self, tmp_path):
+        """Global tts.speed used as fallback."""
+        comm_cls = self._run({"speed": 1.5}, tmp_path)
+        kwargs = comm_cls.call_args[1]
+        assert kwargs["rate"] == "+50%"
+
+    def test_provider_speed_overrides_global(self, tmp_path):
+        """tts.edge.speed takes precedence over tts.speed."""
+        comm_cls = self._run({"speed": 1.5, "edge": {"speed": 2.0}}, tmp_path)
+        kwargs = comm_cls.call_args[1]
+        assert kwargs["rate"] == "+100%"
+
+    def test_speed_below_one(self, tmp_path):
+        """Speed < 1.0 produces a negative rate string."""
+        comm_cls = self._run({"speed": 0.5}, tmp_path)
+        kwargs = comm_cls.call_args[1]
+        assert kwargs["rate"] == "-50%"
+
+    def test_speed_exactly_one_no_rate(self, tmp_path):
+        """Explicit speed=1.0 should not pass rate kwarg."""
+        comm_cls = self._run({"speed": 1.0}, tmp_path)
+        kwargs = comm_cls.call_args[1]
+        assert "rate" not in kwargs
+
+
+# ---------------------------------------------------------------------------
+# OpenAI TTS speed
+# ---------------------------------------------------------------------------
+
+class TestOpenaiTtsSpeed:
+    def _run(self, tts_config, tmp_path, monkeypatch):
+        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+        mock_response = MagicMock()
+        mock_client = MagicMock()
+        mock_client.audio.speech.create.return_value = mock_response
+        mock_cls = MagicMock(return_value=mock_client)
+
+        with patch("tools.tts_tool._import_openai_client", return_value=mock_cls), \
+             patch("tools.tts_tool._resolve_openai_audio_client_config",
+                   return_value=("test-key", None)):
+            from tools.tts_tool import _generate_openai_tts
+            _generate_openai_tts("Hello", str(tmp_path / "out.mp3"), tts_config)
+        return mock_client.audio.speech.create
+
+    def test_default_no_speed_kwarg(self, tmp_path, monkeypatch):
+        """No speed config => no speed kwarg in create call."""
+        create = self._run({}, tmp_path, monkeypatch)
+        kwargs = create.call_args[1]
+        assert "speed" not in kwargs
+
+    def test_global_speed_applied(self, tmp_path, monkeypatch):
+        """Global tts.speed used as fallback."""
+        create = self._run({"speed": 1.5}, tmp_path, monkeypatch)
+        kwargs = create.call_args[1]
+        assert kwargs["speed"] == 1.5
+
+    def test_provider_speed_overrides_global(self, tmp_path, monkeypatch):
+        """tts.openai.speed takes precedence over tts.speed."""
+        create = self._run({"speed": 1.5, "openai": {"speed": 2.0}}, tmp_path, monkeypatch)
+        kwargs = create.call_args[1]
+        assert kwargs["speed"] == 2.0
+
+    def test_speed_clamped_low(self, tmp_path, monkeypatch):
+        """Speed below 0.25 is clamped to 0.25."""
+        create = self._run({"speed": 0.1}, tmp_path, monkeypatch)
+        kwargs = create.call_args[1]
+        assert kwargs["speed"] == 0.25
+
+    def test_speed_clamped_high(self, tmp_path, monkeypatch):
+        """Speed above 4.0 is clamped to 4.0."""
+        create = self._run({"speed": 10.0}, tmp_path, monkeypatch)
+        kwargs = create.call_args[1]
+        assert kwargs["speed"] == 4.0
+
+
+# ---------------------------------------------------------------------------
+# MiniMax TTS speed (global fallback wired)
+# ---------------------------------------------------------------------------
+
+class TestMinimaxTtsSpeed:
+    def _run(self, tts_config, tmp_path, monkeypatch):
+        monkeypatch.setenv("MINIMAX_API_KEY", "test-key")
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "data": {"audio": "deadbeef"},
+            "base_resp": {"status_code": 0, "status_msg": "success"},
+            "extra_info": {"audio_size": 8},
+        }
+
+        # requests is imported locally inside _generate_minimax_tts
+        with patch("requests.post", return_value=mock_response) as mock_post:
+            from tools.tts_tool import _generate_minimax_tts
+            _generate_minimax_tts("Hello", str(tmp_path / "out.mp3"), tts_config)
+        return mock_post
+
+    def test_global_speed_fallback(self, tmp_path, monkeypatch):
+        """Global tts.speed used when minimax.speed not set."""
+        mock_post = self._run({"speed": 1.5}, tmp_path, monkeypatch)
+        payload = mock_post.call_args[1]["json"]
+        assert payload["voice_setting"]["speed"] == 1.5
+
+    def test_provider_speed_overrides_global(self, tmp_path, monkeypatch):
+        """tts.minimax.speed takes precedence over tts.speed."""
+        mock_post = self._run(
+            {"speed": 1.5, "minimax": {"speed": 2.0}}, tmp_path, monkeypatch
+        )
+        payload = mock_post.call_args[1]["json"]
+        assert payload["voice_setting"]["speed"] == 2.0
@@ -38,6 +38,15 @@ def _get_config():
 # Regex for valid HA entity_id format (e.g. "light.living_room", "sensor.temperature_1")
 _ENTITY_ID_RE = re.compile(r"^[a-z_][a-z0-9_]*\.[a-z0-9_]+$")

+# Regex for valid HA service/domain names (e.g. "light", "turn_on", "shell_command").
+# Only lowercase ASCII letters, digits, and underscores — no slashes, dots, or
+# other characters that could allow path traversal in URL construction.
+# The domain and service are interpolated into /api/services/{domain}/{service},
+# so allowing arbitrary strings would enable SSRF via path traversal
+# (e.g. domain="../../api/config") or blocked-domain bypass
+# (e.g. domain="shell_command/../light").
+_SERVICE_NAME_RE = re.compile(r"^[a-z][a-z0-9_]*$")
+
 # Service domains blocked for security -- these allow arbitrary code/command
 # execution on the HA host or enable SSRF attacks on the local network.
 # HA provides zero service-level access control; all safety must be in our layer.
@@ -246,6 +255,14 @@ def _handle_call_service(args: dict, **kw) -> str:
    if not domain or not service:
        return tool_error("Missing required parameters: domain and service")

+    # Validate domain/service format BEFORE the blocklist check — prevents
+    # path traversal in /api/services/{domain}/{service} and blocklist bypass
+    # via payloads like "shell_command/../light".
+    if not _SERVICE_NAME_RE.match(domain):
+        return tool_error(f"Invalid domain format: {domain!r}")
+    if not _SERVICE_NAME_RE.match(service):
+        return tool_error(f"Invalid service format: {service!r}")
+
    if domain in _BLOCKED_DOMAINS:
        return json.dumps({
            "error": f"Service domain '{domain}' is blocked for security. "
@@ -322,7 +322,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    (preserves code-block boundaries, adds part indicators).
    """
    from gateway.config import Platform
-    from gateway.platforms.base import BasePlatformAdapter
+    from gateway.platforms.base import BasePlatformAdapter, utf16_len
    from gateway.platforms.telegram import TelegramAdapter
    from gateway.platforms.discord import DiscordAdapter
    from gateway.platforms.slack import SlackAdapter
@@ -354,9 +354,11 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,

    # Smart-chunk the message to fit within platform limits.
    # For short messages or platforms without a known limit this is a no-op.
+    # Telegram measures length in UTF-16 code units, not Unicode codepoints.
    max_len = _MAX_LENGTHS.get(platform)
    if max_len:
-        chunks = BasePlatformAdapter.truncate_message(message, max_len)
+        _len_fn = utf16_len if platform == Platform.TELEGRAM else None
+        chunks = BasePlatformAdapter.truncate_message(message, max_len, len_fn=_len_fn)
    else:
        chunks = [message]

@@ -296,10 +296,20 @@ class GitHubSource(SkillSource):
        self.taps = list(self.DEFAULT_TAPS)
        if extra_taps:
            self.taps.extend(extra_taps)
+        # Per-instance cache: repo -> (default_branch, tree_entries)
+        # Survives within a single search/install flow, avoiding redundant API calls.
+        self._tree_cache: Dict[str, Tuple[str, List[dict]]] = {}
+        # Set when GitHub returns 403 with rate limit exhausted
+        self._rate_limited: bool = False

    def source_id(self) -> str:
        return "github"

+    @property
+    def is_rate_limited(self) -> bool:
+        """Whether GitHub API rate limit was hit during operations."""
+        return self._rate_limited
+
    def trust_level_for(self, identifier: str) -> str:
        # identifier format: "owner/repo/path/to/skill"
        parts = identifier.split("/", 2)
@@ -443,6 +453,69 @@ class GitHubSource(SkillSource):
        self._write_cache(cache_key, [self._meta_to_dict(s) for s in skills])
        return skills

+    # -- Repo tree cache (avoids redundant API calls) --
+
+    def _get_repo_tree(self, repo: str) -> Optional[Tuple[str, List[dict]]]:
+        """Get cached or fresh repo tree.
+
+        Returns ``(default_branch, tree_entries)`` or ``None``.
+        A single install can call ``_download_directory_via_tree`` and
+        ``_find_skill_in_repo_tree`` multiple times for the same repo — this
+        cache eliminates the redundant ``GET /repos/{repo}`` +
+        ``GET /repos/{repo}/git/trees/{branch}`` round-trips (previously up to
+        6 duplicated pairs per install, consuming ~12 of the 60/hr
+        unauthenticated rate limit for nothing).
+        """
+        if repo in self._tree_cache:
+            return self._tree_cache[repo]
+
+        headers = self.auth.get_headers()
+
+        # Resolve default branch
+        try:
+            resp = httpx.get(
+                f"https://api.github.com/repos/{repo}",
+                headers=headers, timeout=15, follow_redirects=True,
+            )
+            if resp.status_code != 200:
+                self._check_rate_limit_response(resp)
+                return None
+            default_branch = resp.json().get("default_branch", "main")
+        except (httpx.HTTPError, ValueError):
+            return None
+
+        # Fetch recursive tree
+        try:
+            resp = httpx.get(
+                f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
+                params={"recursive": "1"},
+                headers=headers, timeout=30, follow_redirects=True,
+            )
+            if resp.status_code != 200:
+                self._check_rate_limit_response(resp)
+                return None
+            tree_data = resp.json()
+            if tree_data.get("truncated"):
+                logger.debug("Git tree truncated for %s, cannot cache", repo)
+                return None
+        except (httpx.HTTPError, ValueError):
+            return None
+
+        entries = tree_data.get("tree", [])
+        self._tree_cache[repo] = (default_branch, entries)
+        return (default_branch, entries)
+
+    def _check_rate_limit_response(self, resp: "httpx.Response") -> None:
+        """Flag the instance as rate-limited when GitHub returns 403 + exhausted quota."""
+        if resp.status_code == 403:
+            remaining = resp.headers.get("X-RateLimit-Remaining", "")
+            if remaining == "0":
+                self._rate_limited = True
+                logger.warning(
+                    "GitHub API rate limit exhausted (unauthenticated: 60 req/hr). "
+                    "Set GITHUB_TOKEN or install the gh CLI to raise the limit to 5,000/hr."
+                )
+
    def _download_directory(self, repo: str, path: str) -> Dict[str, str]:
        """Recursively download all text files from a GitHub directory.

@@ -458,40 +531,34 @@ class GitHubSource(SkillSource):
        return self._download_directory_recursive(repo, path)

    def _download_directory_via_tree(self, repo: str, path: str) -> Optional[Dict[str, str]]:
-        """Download an entire directory using the Git Trees API (single request)."""
+        """Download an entire directory using the Git Trees API (single request).
+
+        Returns:
+            dict of files if the path exists and has content,
+            empty dict ``{}`` if the tree is cached but the path doesn't exist
+            (prevents unnecessary Contents API fallback),
+            ``None`` if the tree couldn't be fetched (triggers Contents API fallback).
+        """
        path = path.rstrip("/")
-        headers = self.auth.get_headers()

-        # Resolve the default branch via the repo endpoint
-        try:
-            repo_url = f"https://api.github.com/repos/{repo}"
-            resp = httpx.get(repo_url, headers=headers, timeout=15, follow_redirects=True)
-            if resp.status_code != 200:
-                return None
-            default_branch = resp.json().get("default_branch", "main")
-        except (httpx.HTTPError, ValueError):
+        cached = self._get_repo_tree(repo)
+        if cached is None:
            return None
+        _default_branch, tree_entries = cached

-        # Fetch the full recursive tree (branch name works as tree-ish)
-        try:
-            tree_url = f"https://api.github.com/repos/{repo}/git/trees/{default_branch}"
-            resp = httpx.get(
-                tree_url, params={"recursive": "1"},
-                headers=headers, timeout=30, follow_redirects=True,
-            )
-            if resp.status_code != 200:
-                return None
-            tree_data = resp.json()
-            if tree_data.get("truncated"):
-                logger.debug("Git tree truncated for %s, falling back to Contents API", repo)
-                return None
-        except (httpx.HTTPError, ValueError):
-            return None
+        # Check if ANY entry lives under the target path
+        prefix = f"{path}/"
+        has_entries = any(
+            item.get("path", "").startswith(prefix) for item in tree_entries
+        )
+        if not has_entries:
+            # Path definitively doesn't exist in the repo — return empty
+            # instead of None to skip the Contents API fallback.
+            return {}

        # Filter to blobs under our target path and fetch content
-        prefix = f"{path}/"
        files: Dict[str, str] = {}
-        for item in tree_data.get("tree", []):
+        for item in tree_entries:
            if item.get("type") != "blob":
                continue
            item_path = item.get("path", "")
@@ -548,38 +615,14 @@ class GitHubSource(SkillSource):
        handles deeply nested directory structures like
        ``cli-tool/components/skills/development/<skill>/SKILL.md``.
        """
-        # Get default branch
-        try:
-            resp = httpx.get(
-                f"https://api.github.com/repos/{repo}",
-                headers=self.auth.get_headers(),
-                timeout=15,
-                follow_redirects=True,
-            )
-            if resp.status_code != 200:
-                return None
-            default_branch = resp.json().get("default_branch", "main")
-        except (httpx.HTTPError, json.JSONDecodeError):
-            return None
-
-        # Get recursive tree (single API call for the entire repo)
-        try:
-            resp = httpx.get(
-                f"https://api.github.com/repos/{repo}/git/trees/{default_branch}",
-                params={"recursive": "1"},
-                headers=self.auth.get_headers(),
-                timeout=30,
-                follow_redirects=True,
-            )
-            if resp.status_code != 200:
-                return None
-            tree_data = resp.json()
-        except (httpx.HTTPError, json.JSONDecodeError):
+        cached = self._get_repo_tree(repo)
+        if cached is None:
            return None
+        _default_branch, tree_entries = cached

        # Look for SKILL.md files inside directories named <skill_name>
        skill_md_suffix = f"/{skill_name}/SKILL.md"
-        for entry in tree_data.get("tree", []):
+        for entry in tree_entries:
            if entry.get("type") != "blob":
                continue
            path = entry.get("path", "")
@@ -601,6 +644,7 @@ class GitHubSource(SkillSource):
            )
            if resp.status_code == 200:
                return resp.text
+            self._check_rate_limit_response(resp)
        except httpx.HTTPError as e:
            logger.debug("GitHub contents API fetch failed: %s", e)
        return None
@@ -2654,6 +2698,222 @@ def check_for_skill_updates(
    return results


+# ---------------------------------------------------------------------------
+# Hermes centralized index source
+# ---------------------------------------------------------------------------
+
+HERMES_INDEX_URL = "https://hermes-agent.nousresearch.com/docs/api/skills-index.json"
+HERMES_INDEX_CACHE_FILE = INDEX_CACHE_DIR / "hermes-index.json"
+HERMES_INDEX_TTL = 6 * 3600  # 6 hours
+
+
+def _load_hermes_index() -> Optional[dict]:
+    """Fetch the centralized skills index, with local cache.
+
+    The index is a JSON file hosted on the docs site, rebuilt daily by CI.
+    We cache it locally for HERMES_INDEX_TTL seconds to avoid repeated
+    downloads within a session.
+    """
+    # Check local cache
+    if HERMES_INDEX_CACHE_FILE.exists():
+        try:
+            age = time.time() - HERMES_INDEX_CACHE_FILE.stat().st_mtime
+            if age < HERMES_INDEX_TTL:
+                return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
+        except (OSError, json.JSONDecodeError):
+            pass
+
+    # Fetch from docs site
+    try:
+        resp = httpx.get(HERMES_INDEX_URL, timeout=15, follow_redirects=True)
+        if resp.status_code != 200:
+            logger.debug("Hermes index fetch returned %d", resp.status_code)
+            return _load_stale_index_cache()
+        data = resp.json()
+    except (httpx.HTTPError, json.JSONDecodeError) as e:
+        logger.debug("Hermes index fetch failed: %s", e)
+        return _load_stale_index_cache()
+
+    # Validate structure
+    if not isinstance(data, dict) or "skills" not in data:
+        return _load_stale_index_cache()
+
+    # Cache locally
+    try:
+        HERMES_INDEX_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
+        HERMES_INDEX_CACHE_FILE.write_text(json.dumps(data))
+    except OSError:
+        pass
+
+    return data
+
+
+def _load_stale_index_cache() -> Optional[dict]:
+    """Fall back to stale cache when the network fetch fails."""
+    if HERMES_INDEX_CACHE_FILE.exists():
+        try:
+            return json.loads(HERMES_INDEX_CACHE_FILE.read_text())
+        except (OSError, json.JSONDecodeError):
+            pass
+    return None
+
+
+class HermesIndexSource(SkillSource):
+    """Skill source backed by the centralized Hermes Skills Index.
+
+    The index is a JSON catalog published to the docs site and rebuilt
+    daily by CI.  It contains metadata + resolved GitHub paths for every
+    skill, eliminating the need for users to hit the GitHub API for
+    search or path discovery.
+
+    When the index is unavailable, all methods return empty / None so
+    downstream sources take over transparently.
+    """
+
+    def __init__(self, auth: GitHubAuth):
+        self._index: Optional[dict] = None
+        self._loaded = False
+        self.auth = auth
+        # Lazily create GitHubSource for fetch — only used when actually
+        # downloading files, which requires real GitHub API calls.
+        self._github: Optional[GitHubSource] = None
+
+    def _ensure_loaded(self) -> dict:
+        if not self._loaded:
+            self._index = _load_hermes_index()
+            self._loaded = True
+        return self._index or {}
+
+    def _get_github(self) -> GitHubSource:
+        if self._github is None:
+            self._github = GitHubSource(auth=self.auth)
+        return self._github
+
+    def source_id(self) -> str:
+        return "hermes-index"
+
+    @property
+    def is_available(self) -> bool:
+        """Whether the index is loaded and has skills."""
+        index = self._ensure_loaded()
+        return bool(index.get("skills"))
+
+    def trust_level_for(self, identifier: str) -> str:
+        index = self._ensure_loaded()
+        for skill in index.get("skills", []):
+            if skill.get("identifier") == identifier:
+                return skill.get("trust_level", "community")
+        return "community"
+
+    def search(self, query: str, limit: int = 10) -> List[SkillMeta]:
+        """Search the cached index.  Zero API calls."""
+        index = self._ensure_loaded()
+        skills = index.get("skills", [])
+        if not skills:
+            return []
+
+        if not query.strip():
+            # No query — return featured/popular
+            return [self._to_meta(s) for s in skills[:limit]]
+
+        query_lower = query.lower()
+        results: List[SkillMeta] = []
+        for s in skills:
+            searchable = f"{s.get('name', '')} {s.get('description', '')} {' '.join(s.get('tags', []))}".lower()
+            if query_lower in searchable:
+                results.append(self._to_meta(s))
+                if len(results) >= limit:
+                    break
+        return results
+
+    def fetch(self, identifier: str) -> Optional[SkillBundle]:
+        """Fetch a skill using the resolved path from the index.
+
+        If the index has a ``resolved_github_id`` for this skill, we skip
+        the entire candidate/discovery chain and go directly to GitHub
+        with the exact path.  This reduces install from ~31 API calls to
+        just the file content downloads (~5-22 depending on skill size).
+        """
+        index = self._ensure_loaded()
+        entry = self._find_entry(identifier, index)
+        if not entry:
+            return None
+
+        # Use resolved path if available
+        resolved = entry.get("resolved_github_id")
+        if resolved:
+            bundle = self._get_github().fetch(resolved)
+            if bundle:
+                bundle.source = entry.get("source", "hermes-index")
+                bundle.identifier = identifier
+                return bundle
+
+        # Fall back to identifier-based fetch via repo/path
+        repo = entry.get("repo", "")
+        path = entry.get("path", "")
+        if repo and path:
+            github_id = f"{repo}/{path}"
+            bundle = self._get_github().fetch(github_id)
+            if bundle:
+                bundle.source = entry.get("source", "hermes-index")
+                bundle.identifier = identifier
+                return bundle
+
+        return None
+
+    def inspect(self, identifier: str) -> Optional[SkillMeta]:
+        """Return metadata from the index.  Zero API calls."""
+        index = self._ensure_loaded()
+        entry = self._find_entry(identifier, index)
+        if entry:
+            return self._to_meta(entry)
+        return None
+
+    def _find_entry(self, identifier: str, index: dict) -> Optional[dict]:
+        """Look up a skill in the index by identifier or name."""
+        skills = index.get("skills", [])
+
+        # Exact identifier match
+        for s in skills:
+            if s.get("identifier") == identifier:
+                return s
+
+        # Try without source prefix (e.g. "skills-sh/" stripped)
+        normalized = identifier
+        for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
+            if identifier.startswith(prefix):
+                normalized = identifier[len(prefix):]
+                break
+
+        # Match on normalized identifier or name
+        for s in skills:
+            sid = s.get("identifier", "")
+            # Strip prefix from stored identifier too
+            stored_normalized = sid
+            for prefix in ("skills-sh/", "skills.sh/", "official/", "github/", "clawhub/"):
+                if sid.startswith(prefix):
+                    stored_normalized = sid[len(prefix):]
+                    break
+            if stored_normalized == normalized:
+                return s
+
+        return None
+
+    @staticmethod
+    def _to_meta(entry: dict) -> SkillMeta:
+        return SkillMeta(
+            name=entry.get("name", ""),
+            description=entry.get("description", ""),
+            source=entry.get("source", "hermes-index"),
+            identifier=entry.get("identifier", ""),
+            trust_level=entry.get("trust_level", "community"),
+            repo=entry.get("repo"),
+            path=entry.get("path"),
+            tags=entry.get("tags", []),
+            extra=entry.get("extra", {}),
+        )
+
+
 def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]:
    """
    Create all configured source adapters.
@@ -2667,6 +2927,7 @@ def create_source_router(auth: Optional[GitHubAuth] = None) -> List[SkillSource]

    sources: List[SkillSource] = [
        OptionalSkillSource(),        # Official optional skills (highest priority)
+        HermesIndexSource(auth=auth), # Centralized index (search + resolved install paths)
        SkillsShSource(auth=auth),
        WellKnownSkillSource(),
        GitHubSource(auth=auth, extra_taps=extra_taps),
@@ -2709,10 +2970,27 @@ def parallel_search_sources(
    per_source_limits = per_source_limits or {}

    active: List[SkillSource] = []
+    # When the centralized index is available and the user hasn't filtered
+    # to a specific source, skip external API sources (github, skills-sh,
+    # clawhub, etc.) — the index already has their data.  This avoids
+    # ~70 GitHub API calls per search for unauthenticated users.
+    _index_available = False
+    _api_source_ids = frozenset({"github", "skills-sh", "clawhub",
+                                  "claude-marketplace", "lobehub", "well-known"})
+    if source_filter == "all":
+        for src in sources:
+            if (src.source_id() == "hermes-index"
+                    and getattr(src, "is_available", False)):
+                _index_available = True
+                break
+
    for src in sources:
        sid = src.source_id()
        if source_filter != "all" and sid != source_filter and sid != "official":
            continue
+        # Skip external API sources when the index covers them
+        if _index_available and sid in _api_source_ids:
+            continue
        active.append(src)

    all_results: List[SkillMeta] = []
@@ -188,8 +188,14 @@ async def _generate_edge_tts(text: str, output_path: str, tts_config: Dict[str,
    _edge_tts = _import_edge_tts()
    edge_config = tts_config.get("edge", {})
    voice = edge_config.get("voice", DEFAULT_EDGE_VOICE)
+    speed = float(edge_config.get("speed", tts_config.get("speed", 1.0)))

-    communicate = _edge_tts.Communicate(text, voice)
+    kwargs = {"voice": voice}
+    if speed != 1.0:
+        pct = round((speed - 1.0) * 100)
+        kwargs["rate"] = f"{pct:+d}%"
+
+    communicate = _edge_tts.Communicate(text, **kwargs)
    await communicate.save(output_path)
    return output_path

@@ -261,6 +267,7 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
    model = oai_config.get("model", DEFAULT_OPENAI_MODEL)
    voice = oai_config.get("voice", DEFAULT_OPENAI_VOICE)
    base_url = oai_config.get("base_url", base_url)
+    speed = float(oai_config.get("speed", tts_config.get("speed", 1.0)))

    # Determine response format from extension
    if output_path.endswith(".ogg"):
@@ -271,13 +278,16 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
    OpenAIClient = _import_openai_client()
    client = OpenAIClient(api_key=api_key, base_url=base_url)
    try:
-        response = client.audio.speech.create(
+        create_kwargs = dict(
            model=model,
            voice=voice,
            input=text,
            response_format=response_format,
            extra_headers={"x-idempotency-key": str(uuid.uuid4())},
        )
+        if speed != 1.0:
+            create_kwargs["speed"] = max(0.25, min(4.0, speed))
+        response = client.audio.speech.create(**create_kwargs)

        response.stream_to_file(output_path)
        return output_path
@@ -314,7 +324,7 @@ def _generate_minimax_tts(text: str, output_path: str, tts_config: Dict[str, Any
    mm_config = tts_config.get("minimax", {})
    model = mm_config.get("model", DEFAULT_MINIMAX_MODEL)
    voice_id = mm_config.get("voice_id", DEFAULT_MINIMAX_VOICE_ID)
-    speed = mm_config.get("speed", 1)
+    speed = mm_config.get("speed", tts_config.get("speed", 1))
    vol = mm_config.get("vol", 1)
    pitch = mm_config.get("pitch", 0)
    base_url = mm_config.get("base_url", DEFAULT_MINIMAX_BASE_URL)
@@ -106,8 +106,9 @@ def detect_audio_environment() -> dict:
    if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
        warnings.append("Running over SSH -- no audio devices available")

-    # Docker detection
-    if os.path.exists('/.dockerenv'):
+    # Docker/Podman container detection
+    from hermes_constants import is_container
+    if is_container():
        warnings.append("Running inside Docker container -- no audio devices")

    # WSL detection — PulseAudio bridge makes audio work in WSL.
@@ -165,6 +165,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]

+[[package]]
+name = "aiosqlite"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
+]
+
 [[package]]
 name = "altair"
 version = "6.0.0"
@@ -240,6 +249,54 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]

+[[package]]
+name = "asyncpg"
+version = "0.31.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667, upload-time = "2025-11-24T23:27:00.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/17/cc02bc49bc350623d050fa139e34ea512cd6e020562f2a7312a7bcae4bc9/asyncpg-0.31.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eee690960e8ab85063ba93af2ce128c0f52fd655fdff9fdb1a28df01329f031d", size = 643159, upload-time = "2025-11-24T23:25:36.443Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/62/4ded7d400a7b651adf06f49ea8f73100cca07c6df012119594d1e3447aa6/asyncpg-0.31.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2657204552b75f8288de08ca60faf4a99a65deef3a71d1467454123205a88fab", size = 638157, upload-time = "2025-11-24T23:25:37.89Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/5b/4179538a9a72166a0bf60ad783b1ef16efb7960e4d7b9afe9f77a5551680/asyncpg-0.31.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a429e842a3a4b4ea240ea52d7fe3f82d5149853249306f7ff166cb9948faa46c", size = 2918051, upload-time = "2025-11-24T23:25:39.461Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/35/c27719ae0536c5b6e61e4701391ffe435ef59539e9360959240d6e47c8c8/asyncpg-0.31.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0807be46c32c963ae40d329b3a686356e417f674c976c07fa49f1b30303f109", size = 2972640, upload-time = "2025-11-24T23:25:41.512Z" },
+    { url = "https://files.pythonhosted.org/packages/43/f4/01ebb9207f29e645a64699b9ce0eefeff8e7a33494e1d29bb53736f7766b/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e5d5098f63beeae93512ee513d4c0c53dc12e9aa2b7a1af5a81cddf93fe4e4da", size = 2851050, upload-time = "2025-11-24T23:25:43.153Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/f4/03ff1426acc87be0f4e8d40fa2bff5c3952bef0080062af9efc2212e3be8/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37fc6c00a814e18eef51833545d1891cac9aa69140598bb076b4cd29b3e010b9", size = 2962574, upload-time = "2025-11-24T23:25:44.942Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/39/cc788dfca3d4060f9d93e67be396ceec458dfc429e26139059e58c2c244d/asyncpg-0.31.0-cp311-cp311-win32.whl", hash = "sha256:5a4af56edf82a701aece93190cc4e094d2df7d33f6e915c222fb09efbb5afc24", size = 521076, upload-time = "2025-11-24T23:25:46.486Z" },
+    { url = "https://files.pythonhosted.org/packages/28/fc/735af5384c029eb7f1ca60ccb8fa95521dbdaeef788edf4cecfc604c3cab/asyncpg-0.31.0-cp311-cp311-win_amd64.whl", hash = "sha256:480c4befbdf079c14c9ca43c8c5e1fe8b6296c96f1f927158d4f1e750aacc047", size = 584980, upload-time = "2025-11-24T23:25:47.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042, upload-time = "2025-11-24T23:25:49.578Z" },
+    { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504, upload-time = "2025-11-24T23:25:51.501Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241, upload-time = "2025-11-24T23:25:53.278Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321, upload-time = "2025-11-24T23:25:54.982Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685, upload-time = "2025-11-24T23:25:57.43Z" },
+    { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858, upload-time = "2025-11-24T23:25:59.636Z" },
+    { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852, upload-time = "2025-11-24T23:26:01.084Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175, upload-time = "2025-11-24T23:26:02.564Z" },
+    { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111, upload-time = "2025-11-24T23:26:04.467Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928, upload-time = "2025-11-24T23:26:05.944Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067, upload-time = "2025-11-24T23:26:07.957Z" },
+    { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156, upload-time = "2025-11-24T23:26:09.591Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636, upload-time = "2025-11-24T23:26:11.168Z" },
+    { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079, upload-time = "2025-11-24T23:26:13.164Z" },
+    { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606, upload-time = "2025-11-24T23:26:14.78Z" },
+    { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569, upload-time = "2025-11-24T23:26:16.189Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867, upload-time = "2025-11-24T23:26:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349, upload-time = "2025-11-24T23:26:19.689Z" },
+    { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428, upload-time = "2025-11-24T23:26:21.275Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678, upload-time = "2025-11-24T23:26:23.627Z" },
+    { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505, upload-time = "2025-11-24T23:26:25.235Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744, upload-time = "2025-11-24T23:26:26.944Z" },
+    { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251, upload-time = "2025-11-24T23:26:28.404Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901, upload-time = "2025-11-24T23:26:30.34Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280, upload-time = "2025-11-24T23:26:32Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931, upload-time = "2025-11-24T23:26:33.572Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608, upload-time = "2025-11-24T23:26:35.638Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738, upload-time = "2025-11-24T23:26:37.275Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026, upload-time = "2025-11-24T23:26:39.423Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426, upload-time = "2025-11-24T23:26:41.032Z" },
+    { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495, upload-time = "2025-11-24T23:26:42.659Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" },
+]
+
 [[package]]
 name = "atroposlib"
 version = "0.4.0"
@@ -1672,6 +1729,8 @@ acp = [
 all = [
    { name = "agent-client-protocol" },
    { name = "aiohttp" },
+    { name = "aiosqlite", marker = "sys_platform == 'linux'" },
+    { name = "asyncpg", marker = "sys_platform == 'linux'" },
    { name = "croniter" },
    { name = "daytona" },
    { name = "debugpy" },
@@ -1727,6 +1786,8 @@ honcho = [
    { name = "honcho-ai" },
 ]
 matrix = [
+    { name = "aiosqlite" },
+    { name = "asyncpg" },
    { name = "markdown" },
    { name = "mautrix", extra = ["encryption"] },
 ]
@@ -1791,7 +1852,9 @@ requires-dist = [
    { name = "aiohttp", marker = "extra == 'homeassistant'", specifier = ">=3.9.0,<4" },
    { name = "aiohttp", marker = "extra == 'messaging'", specifier = ">=3.13.3,<4" },
    { name = "aiohttp", marker = "extra == 'sms'", specifier = ">=3.9.0,<4" },
+    { name = "aiosqlite", marker = "extra == 'matrix'", specifier = ">=0.20" },
    { name = "anthropic", specifier = ">=0.39.0,<1" },
+    { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" },
    { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
    { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
    { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
@@ -0,0 +1,48 @@
+# Hermes Agent — Web UI
+
+Browser-based dashboard for managing Hermes Agent configuration, API keys, and monitoring active sessions.
+
+## Stack
+
+- **Vite** + **React 19** + **TypeScript**
+- **Tailwind CSS v4** with custom dark theme
+- **shadcn/ui**-style components (hand-rolled, no CLI dependency)
+
+## Development
+
+```bash
+# Start the backend API server
+cd ../
+python -m hermes_cli.main web --no-open
+
+# In another terminal, start the Vite dev server (with HMR + API proxy)
+cd web/
+npm run dev
+```
+
+The Vite dev server proxies `/api` requests to `http://127.0.0.1:9119` (the FastAPI backend).
+
+## Build
+
+```bash
+npm run build
+```
+
+This outputs to `../hermes_cli/web_dist/`, which the FastAPI server serves as a static SPA. The built assets are included in the Python package via `pyproject.toml` package-data.
+
+## Structure
+
+```
+src/
+├── components/ui/   # Reusable UI primitives (Card, Badge, Button, Input, etc.)
+├── lib/
+│   ├── api.ts       # API client — typed fetch wrappers for all backend endpoints
+│   └── utils.ts     # cn() helper for Tailwind class merging
+├── pages/
+│   ├── StatusPage   # Agent status, active/recent sessions
+│   ├── ConfigPage   # Dynamic config editor (reads schema from backend)
+│   └── EnvPage      # API key management with save/clear
+├── App.tsx          # Main layout and navigation
+├── main.tsx         # React entry point
+└── index.css        # Tailwind imports and theme variables
+```
@@ -0,0 +1,23 @@
+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+import { defineConfig, globalIgnores } from 'eslint/config'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      js.configs.recommended,
+      tseslint.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+  },
+])
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/favicon.ico" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Hermes Agent</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
--- a/Show More
+++ b/Show More