feat: deep-research skill

fix: extend ASCII-locale UnicodeEncodeError recovery to full request payload
The existing ASCII codec handler only sanitized conversation messages, leaving tool schemas, system prompts, ephemeral prompts, prefill messages, and HTTP headers as unhandled sources of non-ASCII content. On systems with LANG=C or non-UTF-8 locale, Unicode symbols in tool descriptions (e.g. arrows, em-dashes from prompt_builder) and system prompt content would cause UnicodeEncodeError that fell through to the error path. Changes: - Add _sanitize_structure_non_ascii() generic recursive walker for nested dict/list payloads - Add _sanitize_tools_non_ascii() thin wrapper for tool schemas - Add _force_ascii_payload flag: once ASCII locale is detected, all subsequent API calls get proactively sanitized (prevents recurring failures from new tool results bringing fresh Unicode each turn) - Extend the ASCII codec error handler to sanitize: prefill_messages, tool schemas (self.tools), system prompt, ephemeral system prompt, and default HTTP headers - Update stale comment that acknowledged the gap Cherry-picked from PR #8834 (credential pool changes dropped as separate concern).
2026-04-13 16:41:48 +00:00 · 2026-04-13 05:16:35 -07:00 · 2026-04-13 05:16:21 -07:00 · 2026-04-13 05:16:21 -07:00 · 2026-04-13 05:15:05 -07:00 · 2026-04-13 05:12:36 -07:00
217 changed files with 28630 additions and 2075 deletions
@@ -5,6 +5,7 @@

 # Dependencies
 node_modules
+.venv

 # CI/CD
 .github
@@ -0,0 +1,2 @@
+# Auto-generated files — collapse diffs and exclude from language stats
+web/package-lock.json linguist-generated=true
@@ -41,11 +41,19 @@ jobs:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
+        run: pip install pyyaml httpx

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Build skills index (if not already present)
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [ ! -f website/static/api/skills-index.json ]; then
+            python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
+          fi
+
      - name: Install dependencies
        run: npm ci
        working-directory: website
@@ -69,9 +69,7 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: |
-            nousresearch/hermes-agent:latest
-            nousresearch/hermes-agent:${{ github.sha }}
+          tags: nousresearch/hermes-agent:latest
          cache-from: type=gha
          cache-to: type=gha,mode=max

@@ -83,9 +81,6 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: |
-            nousresearch/hermes-agent:latest
-            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
-            nousresearch/hermes-agent:${{ github.sha }}
+          tags: nousresearch/hermes-agent:${{ github.event.release.tag_name }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
@@ -0,0 +1,101 @@
+name: Build Skills Index
+
+on:
+  schedule:
+    # Run twice daily: 6 AM and 6 PM UTC
+    - cron: '0 6,18 * * *'
+  workflow_dispatch:  # Manual trigger
+  push:
+    branches: [main]
+    paths:
+      - 'scripts/build_skills_index.py'
+      - '.github/workflows/skills-index.yml'
+
+permissions:
+  contents: read
+
+jobs:
+  build-index:
+    # Only run on the upstream repository, not on forks
+    if: github.repository == 'NousResearch/hermes-agent'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: pip install httpx pyyaml
+
+      - name: Build skills index
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: python scripts/build_skills_index.py
+
+      - name: Upload index artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: skills-index
+          path: website/static/api/skills-index.json
+          retention-days: 7
+
+  deploy-with-index:
+    needs: build-index
+    runs-on: ubuntu-latest
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deploy.outputs.page_url }}
+    # Only deploy on schedule or manual trigger (not on every push to the script)
+    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/download-artifact@v4
+        with:
+          name: skills-index
+          path: website/static/api/
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+          cache-dependency-path: website/package-lock.json
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install PyYAML for skill extraction
+        run: pip install pyyaml
+
+      - name: Extract skill metadata for dashboard
+        run: python3 website/scripts/extract-skills.py
+
+      - name: Install dependencies
+        run: npm ci
+        working-directory: website
+
+      - name: Build Docusaurus
+        run: npm run build
+        working-directory: website
+
+      - name: Stage deployment
+        run: |
+          mkdir -p _site/docs
+          cp -r landingpage/* _site/
+          cp -r website/build/* _site/docs/
+          echo "hermes-agent.nousresearch.com" > _site/CNAME
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: _site
+
+      - name: Deploy to GitHub Pages
+        id: deploy
+        uses: actions/deploy-pages@v4
@@ -51,6 +51,9 @@ ignored/
 .worktrees/
 environments/benchmarks/evals/

+# Web UI build output
+hermes_cli/web_dist/
+
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
@@ -58,3 +61,4 @@ mini-swe-agent/
 # Nix
 .direnv/
 result
+website/static/api/skills-index.json
@@ -1,27 +1,44 @@
+FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source
+FROM tianon/gosu:1.19-trixie@sha256:3b176695959c71e123eb390d427efc665eeb561b1540e82679c15e992006b8b9 AS gosu_source
 FROM debian:13.4

 # Disable Python stdout buffering to ensure logs are printed immediately
 ENV PYTHONUNBUFFERED=1

+# Store Playwright browsers outside the volume mount so the build-time
+# install survives the /opt/data volume overlay at runtime.
+ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
+
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
    rm -rf /var/lib/apt/lists/*

+# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
+RUN useradd -u 10000 -m -d /opt/data hermes
+
+COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
+COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
+
 COPY . /opt/hermes
 WORKDIR /opt/hermes

-# Install Python and Node dependencies in one layer, no cache
-RUN pip install --no-cache-dir uv --break-system-packages && \
-    uv pip install --system --break-system-packages --no-cache -e ".[all]" && \
-    npm install --prefer-offline --no-audit && \
+# Install Node dependencies and Playwright as root (--with-deps needs apt)
+RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    cd /opt/hermes/scripts/whatsapp-bridge && \
    npm install --prefer-offline --no-audit && \
    npm cache clean --force

-WORKDIR /opt/hermes
+# Hand ownership to hermes user, then install Python deps in a virtualenv
+RUN chown -R hermes:hermes /opt/hermes
+USER hermes
+
+RUN uv venv && \
+    uv pip install --no-cache-dir -e ".[all]"
+
+USER root
 RUN chmod +x /opt/hermes/docker/entrypoint.sh

 ENV HERMES_HOME=/opt/data
@@ -167,6 +167,7 @@ python -m pytest tests/ -q
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
+- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---

@@ -27,10 +27,6 @@ Per-task overrides are configured in config.yaml under the ``auxiliary:`` sectio
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
 Default "auto" follows the chains above.

-Legacy env var overrides (AUXILIARY_{TASK}_PROVIDER, AUXILIARY_{TASK}_MODEL,
-AUXILIARY_{TASK}_BASE_URL, etc.) are still read as a backward-compat fallback
-but config.yaml takes priority.  New configuration should always use config.yaml.
-
 Payment / credit exhaustion fallback:
  When a resolved provider returns HTTP 402 or a credit-related error,
  call_llm() automatically retries with the next available provider in the
@@ -75,13 +71,13 @@ _PROVIDER_ALIASES = {
 }


-def _normalize_aux_provider(provider: Optional[str], *, for_vision: bool = False) -> str:
+def _normalize_aux_provider(provider: Optional[str]) -> str:
    normalized = (provider or "auto").strip().lower()
    if normalized.startswith("custom:"):
        suffix = normalized.split(":", 1)[1].strip()
        if not suffix:
            return "custom"
-        normalized = suffix if not for_vision else "custom"
+        normalized = suffix
    if normalized == "codex":
        return "openai-codex"
    if normalized == "main":
@@ -753,30 +749,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

 # ── Provider resolution helpers ─────────────────────────────────────────────

-def _get_auxiliary_provider(task: str = "") -> str:
-    """Read the provider override for a specific auxiliary task.
-
-    Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER),
-    then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider),
-    then falls back to "auto".  Returns one of: "auto", "openrouter", "nous", "main".
-    """
-    if task:
-        for prefix in ("AUXILIARY_", "CONTEXT_"):
-            val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower()
-            if val and val != "auto":
-                return val
-    return "auto"
-
-
-def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
-    """Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
-    if not task:
-        return None
-    for prefix in ("AUXILIARY_", "CONTEXT_"):
-        val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
-        if val:
-            return val
-    return None


 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -1021,6 +993,23 @@ _AUTO_PROVIDER_LABELS = {

 _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})

+_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
+
+
+def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
+    """Return a sanitized copy of a live main-runtime override."""
+    if not isinstance(main_runtime, dict):
+        return {}
+    normalized: Dict[str, str] = {}
+    for field in _MAIN_RUNTIME_FIELDS:
+        value = main_runtime.get(field)
+        if isinstance(value, str) and value.strip():
+            normalized[field] = value.strip()
+    provider = normalized.get("provider")
+    if provider:
+        normalized["provider"] = provider.lower()
+    return normalized
+

 def _get_provider_chain() -> List[tuple]:
    """Return the ordered provider detection chain.
@@ -1130,7 +1119,7 @@ def _try_payment_fallback(
    return None, None, ""


-def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -1142,6 +1131,12 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    """
    global auxiliary_is_nous, _stale_base_url_warned
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_provider = runtime.get("provider", "")
+    runtime_model = runtime.get("model", "")
+    runtime_base_url = runtime.get("base_url", "")
+    runtime_api_key = runtime.get("api_key", "")
+    runtime_api_mode = runtime.get("api_mode", "")

    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
    #    provider (not 'custom').  This catches the common "env poisoning"
@@ -1149,7 +1144,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    #    old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
    if not _stale_base_url_warned:
        _env_base = os.getenv("OPENAI_BASE_URL", "").strip()
-        _cfg_provider = _read_main_provider()
+        _cfg_provider = runtime_provider or _read_main_provider()
        if (_env_base and _cfg_provider
                and _cfg_provider != "custom"
                and not _cfg_provider.startswith("custom:")):
@@ -1163,12 +1158,25 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
            _stale_base_url_warned = True

    # ── Step 1: non-aggregator main provider → use main model directly ──
-    main_provider = _read_main_provider()
-    main_model = _read_main_model()
+    main_provider = runtime_provider or _read_main_provider()
+    main_model = runtime_model or _read_main_model()
    if (main_provider and main_model
            and main_provider not in _AGGREGATOR_PROVIDERS
            and main_provider not in ("auto", "")):
-        client, resolved = resolve_provider_client(main_provider, main_model)
+        resolved_provider = main_provider
+        explicit_base_url = None
+        explicit_api_key = None
+        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
+            resolved_provider = "custom"
+            explicit_base_url = runtime_base_url
+            explicit_api_key = runtime_api_key or None
+        client, resolved = resolve_provider_client(
+            resolved_provider,
+            main_model,
+            explicit_base_url=explicit_base_url,
+            explicit_api_key=explicit_api_key,
+            api_mode=runtime_api_mode or None,
+        )
        if client is not None:
            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
                        main_provider, resolved or main_model)
@@ -1249,6 +1257,7 @@ def resolve_provider_client(
    explicit_base_url: str = None,
    explicit_api_key: str = None,
    api_mode: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1319,7 +1328,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto()
+        client, resolved = _resolve_auto(main_runtime=main_runtime)
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -1543,15 +1552,19 @@ def resolve_provider_client(

 # ── Public API ──────────────────────────────────────────────────────────────

-def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
+def get_text_auxiliary_client(
+    task: str = "",
+    *,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, default_model_slug) for text-only auxiliary tasks.

    Args:
        task: Optional task name ("compression", "web_extract") to check
              for a task-specific provider override.

-    Callers may override the returned model with a per-task env var
-    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
+    Callers may override the returned model via config.yaml
+    (e.g. auxiliary.compression.model, auxiliary.web_extract.model).
    """
    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
    return resolve_provider_client(
@@ -1560,10 +1573,11 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=main_runtime,
    )


-def get_async_text_auxiliary_client(task: str = ""):
+def get_async_text_auxiliary_client(task: str = "", *, main_runtime: Optional[Dict[str, Any]] = None):
    """Return (async_client, model_slug) for async consumers.

    For standard providers returns (AsyncOpenAI, model). For Codex returns
@@ -1578,6 +1592,7 @@ def get_async_text_auxiliary_client(task: str = ""):
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=main_runtime,
    )


@@ -1588,7 +1603,7 @@ _VISION_AUTO_PROVIDER_ORDER = (


 def _normalize_vision_provider(provider: Optional[str]) -> str:
-    return _normalize_aux_provider(provider, for_vision=True)
+    return _normalize_aux_provider(provider)


 def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
@@ -1671,6 +1686,7 @@ def resolve_vision_provider_client(
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
            explicit_api_key=resolved_api_key,
+            api_mode=resolved_api_mode,
        )
        if client is None:
            return "custom", None, None
@@ -1695,7 +1711,8 @@ def resolve_vision_provider_client(
                # Use provider-specific vision model if available, otherwise main model.
                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
-                    main_provider, vision_model)
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using active provider %s (%s)",
@@ -1719,7 +1736,8 @@ def resolve_vision_provider_client(
        sync_client, default_model = _resolve_strict_vision_backend(requested)
        return _finalize(requested, sync_client, default_model)

-    client, final_model = _get_cached_client(requested, resolved_model, async_mode)
+    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
+                                             api_mode=resolved_api_mode)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -1892,6 +1910,7 @@ def _get_cached_client(
    base_url: str = None,
    api_key: str = None,
    api_mode: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -1915,7 +1934,9 @@ def _get_cached_client(
            loop_id = id(current_loop)
        except RuntimeError:
            pass
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -1940,6 +1961,7 @@ def _get_cached_client(
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
+        main_runtime=runtime,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -1964,9 +1986,8 @@ def _resolve_task_provider_model(

    Priority:
      1. Explicit provider/model/base_url/api_key args (always win)
-      2. Config file (auxiliary.{task}.* or compression.*)
-      3. Env var overrides (backward-compat: AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
-      4. "auto" (full auto-detection chain)
+      2. Config file (auxiliary.{task}.provider/model/base_url)
+      3. "auto" (full auto-detection chain)

    Returns (provider, model, base_url, api_key, api_mode) where model may
    be None (use provider default). When base_url is set, provider is forced
@@ -1997,22 +2018,8 @@ def _resolve_task_provider_model(
        cfg_api_key = str(task_config.get("api_key", "")).strip() or None
        cfg_api_mode = str(task_config.get("api_mode", "")).strip() or None

-        # Backwards compat: compression section has its own keys.
-        # The auxiliary.compression defaults to provider="auto", so treat
-        # both None and "auto" as "not explicitly configured".
-        if task == "compression" and (not cfg_provider or cfg_provider == "auto"):
-            comp = config.get("compression", {}) if isinstance(config, dict) else {}
-            if isinstance(comp, dict):
-                cfg_provider = comp.get("summary_provider", "").strip() or None
-                cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
-                _sbu = comp.get("summary_base_url") or ""
-                cfg_base_url = cfg_base_url or _sbu.strip() or None
-
-    # Env vars are backward-compat fallback only — config.yaml is primary.
-    env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
-    env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
-    resolved_model = model or cfg_model or env_model
-    resolved_api_mode = cfg_api_mode or env_api_mode
+    resolved_model = model or cfg_model
+    resolved_api_mode = cfg_api_mode

    if base_url:
        return "custom", resolved_model, base_url, api_key, resolved_api_mode
@@ -2026,17 +2033,6 @@ def _resolve_task_provider_model(
        if cfg_provider and cfg_provider != "auto":
            return cfg_provider, resolved_model, None, None, resolved_api_mode

-        # Env vars are backward-compat fallback for users who haven't
-        # migrated to config.yaml yet.
-        env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
-        env_api_key = _get_auxiliary_env_override(task, "API_KEY")
-        if env_base_url:
-            return "custom", resolved_model, env_base_url, env_api_key, resolved_api_mode
-
-        env_provider = _get_auxiliary_provider(task)
-        if env_provider != "auto":
-            return env_provider, resolved_model, None, None, resolved_api_mode
-
        return "auto", resolved_model, None, None, resolved_api_mode

    return "auto", resolved_model, None, None, resolved_api_mode
@@ -2065,6 +2061,75 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
    return default


+# ---------------------------------------------------------------------------
+# Anthropic-compatible endpoint detection + image block conversion
+# ---------------------------------------------------------------------------
+
+# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
+# Their image content blocks must use Anthropic format, not OpenAI format.
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+
+
+def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
+    """Detect if an endpoint expects Anthropic-format content blocks.
+
+    Returns True for known Anthropic-compatible providers (MiniMax) and
+    any endpoint whose URL contains ``/anthropic`` in the path.
+    """
+    if provider in _ANTHROPIC_COMPAT_PROVIDERS:
+        return True
+    url_lower = (base_url or "").lower()
+    return "/anthropic" in url_lower
+
+
+def _convert_openai_images_to_anthropic(messages: list) -> list:
+    """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks.
+
+    Only touches messages that have list-type content with ``image_url`` blocks;
+    plain text messages pass through unchanged.
+    """
+    converted = []
+    for msg in messages:
+        content = msg.get("content")
+        if not isinstance(content, list):
+            converted.append(msg)
+            continue
+        new_content = []
+        changed = False
+        for block in content:
+            if block.get("type") == "image_url":
+                image_url_val = (block.get("image_url") or {}).get("url", "")
+                if image_url_val.startswith("data:"):
+                    # Parse data URI: data:<media_type>;base64,<data>
+                    header, _, b64data = image_url_val.partition(",")
+                    media_type = "image/png"
+                    if ":" in header and ";" in header:
+                        media_type = header.split(":", 1)[1].split(";", 1)[0]
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": media_type,
+                            "data": b64data,
+                        },
+                    })
+                else:
+                    # URL-based image
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "url",
+                            "url": image_url_val,
+                        },
+                    })
+                changed = True
+            else:
+                new_content.append(block)
+        converted.append({**msg, "content": new_content} if changed else msg)
+    return converted
+
+
+
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -2149,6 +2214,7 @@ def call_llm(
    model: str = None,
    base_url: str = None,
    api_key: str = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@@ -2214,6 +2280,7 @@ def call_llm(
            base_url=resolved_base_url,
            api_key=resolved_api_key,
            api_mode=resolved_api_mode,
+            main_runtime=main_runtime,
        )
        if client is None:
            # When the user explicitly chose a non-OpenRouter provider but no
@@ -2234,7 +2301,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto")
+                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -2255,6 +2322,11 @@ def call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return _validate_llm_response(
@@ -2331,9 +2403,9 @@ def extract_content_or_reasoning(response) -> str:
    if content:
        # Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
        cleaned = re.sub(
-            r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
+            r"<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>"
            r".*?"
-            r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
+            r"</(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>",
            "", content, flags=re.DOTALL | re.IGNORECASE,
        ).strip()
        if cleaned:
@@ -2443,6 +2515,11 @@ async def async_call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    try:
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
@@ -4,8 +4,12 @@ Self-contained class with its own OpenAI client for summarization.
 Uses auxiliary model (cheap/fast) to summarize middle turns while
 protecting head and tail context.

-Improvements over v1:
-  - Structured summary template (Goal, Progress, Decisions, Files, Next Steps)
+Improvements over v2:
+  - Structured summary template with Resolved/Pending question tracking
+  - Summarizer preamble: "Do not respond to any questions" (from OpenCode)
+  - Handoff framing: "different assistant" (from Codex) to create separation
+  - "Remaining Work" replaces "Next Steps" to avoid reading as active instructions
+  - Clear separator when summary merges into tail message
  - Iterative summary updates (preserves info across multiple compactions)
  - Token-budget tail protection instead of fixed message count
  - Tool output pruning before LLM summarization (cheap pre-pass)
@@ -28,12 +32,13 @@ from agent.model_metadata import (
 logger = logging.getLogger(__name__)

 SUMMARY_PREFIX = (
-    "[CONTEXT COMPACTION] Earlier turns in this conversation were compacted "
-    "to save context space. The summary below describes work that was "
-    "already completed, and the current session state may still reflect "
-    "that work (for example, files may already be changed). Use the summary "
-    "and the current state to continue from where things left off, and "
-    "avoid repeating work:"
+    "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
+    "into the summary below. This is a handoff from a previous context "
+    "window — treat it as background reference, NOT as active instructions. "
+    "Do NOT answer questions or fulfill requests mentioned in this summary; "
+    "they were already addressed. Respond ONLY to the latest user message "
+    "that appears AFTER this summary. The current session state (files, "
+    "config, etc.) may reflect work described here — avoid repeating it:"
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

@@ -81,12 +86,14 @@ class ContextCompressor(ContextEngine):
        base_url: str = "",
        api_key: str = "",
        provider: str = "",
+        api_mode: str = "",
    ) -> None:
        """Update model info after a model switch or fallback activation."""
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
+        self.api_mode = api_mode
        self.context_length = context_length
        self.threshold_tokens = max(
            int(context_length * self.threshold_percent),
@@ -106,11 +113,13 @@ class ContextCompressor(ContextEngine):
        api_key: str = "",
        config_context_length: int | None = None,
        provider: str = "",
+        api_mode: str = "",
    ):
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
+        self.api_mode = api_mode
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
@@ -306,13 +315,20 @@ class ContextCompressor(ContextEngine):

        return "\n\n".join(parts)

-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
+    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]], focus_topic: str = None) -> Optional[str]:
        """Generate a structured summary of conversation turns.

-        Uses a structured template (Goal, Progress, Decisions, Files, Next Steps)
-        inspired by Pi-mono and OpenCode. When a previous summary exists,
+        Uses a structured template (Goal, Progress, Decisions, Resolved/Pending
+        Questions, Files, Remaining Work) with explicit preamble telling the
+        summarizer not to answer questions.  When a previous summary exists,
        generates an iterative update instead of summarizing from scratch.

+        Args:
+            focus_topic: Optional focus string for guided compression.  When
+                provided, the summariser prioritises preserving information
+                related to this topic and is more aggressive about compressing
+                everything else.  Inspired by Claude Code's ``/compact``.
+
        Returns None if all attempts fail — the caller should drop
        the middle turns without a summary rather than inject a useless
        placeholder.
@@ -328,60 +344,27 @@ class ContextCompressor(ContextEngine):
        summary_budget = self._compute_summary_budget(turns_to_summarize)
        content_to_summarize = self._serialize_for_summary(turns_to_summarize)

-        if self._previous_summary:
-            # Iterative update: preserve existing info, add new progress
-            prompt = f"""You are updating a context compaction summary. A previous compaction produced the summary below. New conversation turns have occurred since then and need to be incorporated.
+        # Preamble shared by both first-compaction and iterative-update prompts.
+        # Inspired by OpenCode's "do not respond to any questions" instruction
+        # and Codex's "another language model" framing.
+        _summarizer_preamble = (
+            "You are a summarization agent creating a context checkpoint. "
+            "Your output will be injected as reference material for a DIFFERENT "
+            "assistant that continues the conversation. "
+            "Do NOT respond to any questions or requests in the conversation — "
+            "only output the structured summary. "
+            "Do NOT include any preamble, greeting, or prefix."
+        )

-PREVIOUS SUMMARY:
-{self._previous_summary}
-
-NEW TURNS TO INCORPORATE:
-{content_to_summarize}
-
-Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Remove information only if it is clearly obsolete.
-
-## Goal
-[What the user is trying to accomplish — preserve from previous summary, update if goal evolved]
-
-## Constraints & Preferences
-[User preferences, coding style, constraints, important decisions — accumulate across compactions]
-
-## Progress
-### Done
-[Completed work — include specific file paths, commands run, results obtained]
-### In Progress
-[Work currently underway]
-### Blocked
-[Any blockers or issues encountered]
-
-## Key Decisions
-[Important technical decisions and why they were made]
-
-## Relevant Files
-[Files read, modified, or created — with brief note on each. Accumulate across compactions.]
-
-## Next Steps
-[What needs to happen next to continue the work]
-
-## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
-
-## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries. Accumulate across compactions.]
-
-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
-
-Write only the summary body. Do not include any preamble or prefix."""
-        else:
-            # First compaction: summarize from scratch
-            prompt = f"""Create a structured handoff summary for a later assistant that will continue this conversation after earlier turns are compacted.
-
-TURNS TO SUMMARIZE:
-{content_to_summarize}
-
-Use this exact structure:
-
-## Goal
+        # Shared structured template (used by both paths).
+        # Key changes vs v1:
+        #   - "Pending User Asks" section (from Claude Code) explicitly tracks
+        #     unanswered questions so the model knows what's resolved vs open
+        #   - "Remaining Work" replaces "Next Steps" to avoid reading as active
+        #     instructions
+        #   - "Resolved Questions" makes it clear which questions were already
+        #     answered (prevents model from re-answering them)
+        _template_sections = f"""## Goal
 [What the user is trying to accomplish]

 ## Constraints & Preferences
@@ -398,25 +381,74 @@ Use this exact structure:
 ## Key Decisions
 [Important technical decisions and why they were made]

+## Resolved Questions
+[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
+
+## Pending User Asks
+[Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."]
+
 ## Relevant Files
 [Files read, modified, or created — with brief note on each]

-## Next Steps
-[What needs to happen next to continue the work]
+## Remaining Work
+[What remains to be done — framed as context, not instructions]

 ## Critical Context
 [Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

 ## Tools & Patterns
-[Which tools were used, how they were used effectively, and any tool-specific discoveries (e.g., preferred flags, working invocations, successful command patterns)]
+[Which tools were used, how they were used effectively, and any tool-specific discoveries]

-Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions. The goal is to prevent the next assistant from repeating work or losing important details.
+Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.

 Write only the summary body. Do not include any preamble or prefix."""

+        if self._previous_summary:
+            # Iterative update: preserve existing info, add new progress
+            prompt = f"""{_summarizer_preamble}
+
+You are updating a context compaction summary. A previous compaction produced the summary below. New conversation turns have occurred since then and need to be incorporated.
+
+PREVIOUS SUMMARY:
+{self._previous_summary}
+
+NEW TURNS TO INCORPORATE:
+{content_to_summarize}
+
+Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
+
+{_template_sections}"""
+        else:
+            # First compaction: summarize from scratch
+            prompt = f"""{_summarizer_preamble}
+
+Create a structured handoff summary for a different assistant that will continue this conversation after earlier turns are compacted. The next assistant should be able to understand what happened without re-reading the original turns.
+
+TURNS TO SUMMARIZE:
+{content_to_summarize}
+
+Use this exact structure:
+
+{_template_sections}"""
+
+        # Inject focus topic guidance when the user provides one via /compress <focus>.
+        # This goes at the end of the prompt so it takes precedence.
+        if focus_topic:
+            prompt += f"""
+
+FOCUS TOPIC: "{focus_topic}"
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
+
        try:
            call_kwargs = {
                "task": "compression",
+                "main_runtime": {
+                    "model": self.model,
+                    "provider": self.provider,
+                    "base_url": self.base_url,
+                    "api_key": self.api_key,
+                    "api_mode": self.api_mode,
+                },
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": summary_budget * 2,
                # timeout resolved from auxiliary.compression.timeout config by call_llm
@@ -631,7 +663,7 @@ Write only the summary body. Do not include any preamble or prefix."""
    # Main compression entry point
    # ------------------------------------------------------------------

-    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
+    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, focus_topic: str = None) -> List[Dict[str, Any]]:
        """Compress conversation messages by summarizing middle turns.

        Algorithm:
@@ -643,6 +675,12 @@ Write only the summary body. Do not include any preamble or prefix."""

        After compression, orphaned tool_call / tool_result pairs are cleaned
        up so the API never receives mismatched IDs.
+
+        Args:
+            focus_topic: Optional focus string for guided compression.  When
+                provided, the summariser will prioritise preserving information
+                related to this topic and be more aggressive about compressing
+                everything else.  Inspired by Claude Code's ``/compact``.
        """
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
@@ -700,7 +738,7 @@ Write only the summary body. Do not include any preamble or prefix."""
            )

        # Phase 3: Generate structured summary
-        summary = self._generate_summary(turns_to_summarize)
+        summary = self._generate_summary(turns_to_summarize, focus_topic=focus_topic)

        # Phase 4: Assemble compressed message list
        compressed = []
@@ -755,7 +793,12 @@ Write only the summary body. Do not include any preamble or prefix."""
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
                original = msg.get("content") or ""
-                msg["content"] = summary + "\n\n" + original
+                msg["content"] = (
+                    summary
+                    + "\n\n--- END OF CONTEXT SUMMARY — "
+                    "respond to the message below, not the summary above ---\n\n"
+                    + original
+                )
                _merge_summary_into_tail = False
            compressed.append(msg)

@@ -24,6 +24,7 @@ from hermes_cli.auth import (
    _codex_access_token_is_expiring,
    _decode_jwt_claims,
    _import_codex_cli_tokens,
+    _write_codex_cli_tokens,
    _load_auth_store,
    _load_provider_state,
    _resolve_kimi_base_url,
@@ -693,6 +694,14 @@ class CredentialPool:
                        self._replace_entry(synced, updated)
                        self._persist()
                        self._sync_device_code_entry_to_auth_store(updated)
+                        try:
+                            _write_codex_cli_tokens(
+                                updated.access_token,
+                                updated.refresh_token,
+                                last_refresh=updated.last_refresh,
+                            )
+                        except Exception as wexc:
+                            logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
                        return updated
                    except Exception as retry_exc:
                        logger.debug("Codex retry refresh also failed: %s", retry_exc)
@@ -718,6 +727,17 @@ class CredentialPool:
        # _seed_from_singletons() on the next load_pool() sees fresh state
        # instead of re-seeding stale/consumed tokens.
        self._sync_device_code_entry_to_auth_store(updated)
+        # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
+        # and VS Code don't hit "refresh_token_reused" on their next refresh.
+        if self.provider == "openai-codex":
+            try:
+                _write_codex_cli_tokens(
+                    updated.access_token,
+                    updated.refresh_token,
+                    last_refresh=updated.last_refresh,
+                )
+            except Exception as wexc:
+                logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
        return updated

    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -1128,6 +1148,23 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    elif provider == "openai-codex":
        state = _load_provider_state(auth_store, "openai-codex")
        tokens = state.get("tokens") if isinstance(state, dict) else None
+        # Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
+        # store has no tokens.  This mirrors resolve_codex_runtime_credentials()
+        # so that load_pool() and list_authenticated_providers() detect tokens
+        # that only exist in the Codex CLI shared file.
+        if not (isinstance(tokens, dict) and tokens.get("access_token")):
+            try:
+                from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens
+                cli_tokens = _import_codex_cli_tokens()
+                if cli_tokens:
+                    logger.info("Importing Codex CLI tokens into Hermes auth store.")
+                    _save_codex_tokens(cli_tokens)
+                    # Re-read state after import
+                    auth_store = _load_auth_store()
+                    state = _load_provider_state(auth_store, "openai-codex")
+                    tokens = state.get("tokens") if isinstance(state, dict) else None
+            except Exception as exc:
+                logger.debug("Codex CLI token import failed: %s", exc)
        if isinstance(tokens, dict) and tokens.get("access_token"):
            active_sources.add("device_code")
            changed |= _upsert_entry(
@@ -775,12 +775,12 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                resp = client.post(f"{server_url}/api/show", json={"name": model})
                if resp.status_code == 200:
                    data = resp.json()
-                    # Check model_info for context length
-                    model_info = data.get("model_info", {})
-                    for key, value in model_info.items():
-                        if "context_length" in key and isinstance(value, (int, float)):
-                            return int(value)
-                    # Check parameters string for num_ctx
+                    # Prefer explicit num_ctx from Modelfile parameters: this is
+                    # the *runtime* context Ollama will actually allocate KV cache
+                    # for. The GGUF model_info.context_length is the training max,
+                    # which can be larger than num_ctx — using it here would let
+                    # Hermes grow conversations past the runtime limit and Ollama
+                    # would silently truncate. Matches query_ollama_num_ctx().
                    params = data.get("parameters", "")
                    if "num_ctx" in params:
                        for line in params.split("\n"):
@@ -791,6 +791,11 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                                        return int(parts[-1])
                                    except ValueError:
                                        pass
+                    # Fall back to GGUF model_info context_length (training max)
+                    model_info = data.get("model_info", {})
+                    for key, value in model_info.items():
+                        if "context_length" in key and isinstance(value, (int, float)):
+                            return int(value)

            # LM Studio native API: /api/v1/models returns max_context_length.
            # This is more reliable than the OpenAI-compat /v1/models which
@@ -144,6 +144,8 @@ class ProviderInfo:
 PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openrouter": "openrouter",
    "anthropic": "anthropic",
+    "openai": "openai",
+    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
    "minimax": "minimax",
@@ -12,7 +12,7 @@ import threading
 from collections import OrderedDict
 from pathlib import Path

-from hermes_constants import get_hermes_home, get_skills_dir
+from hermes_constants import get_hermes_home, get_skills_dir, is_wsl
 from typing import Optional

 from agent.skill_utils import (
@@ -364,8 +364,50 @@ PLATFORM_HINTS = {
        "documents. You can also include image URLs in markdown format ![alt](url) and they "
        "will be downloaded and sent as native media when possible."
    ),
+    "wecom": (
+        "You are on WeCom (企业微信 / Enterprise WeChat). Markdown formatting is supported. "
+        "You CAN send media files natively — to deliver a file to the user, include "
+        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
+        "WeCom attachment: images (.jpg, .png, .webp) are sent as photos (up to 10 MB), "
+        "other files (.pdf, .docx, .xlsx, .md, .txt, etc.) arrive as downloadable documents "
+        "(up to 20 MB), and videos (.mp4) play inline. Voice messages are supported but "
+        "must be in AMR format — other audio formats are automatically sent as file attachments. "
+        "You can also include image URLs in markdown format ![alt](url) and they will be "
+        "downloaded and sent as native photos. Do NOT tell the user you lack file-sending "
+        "capability — use MEDIA: syntax whenever a file delivery is appropriate."
+    ),
 }

+# ---------------------------------------------------------------------------
+# Environment hints — execution-environment awareness for the agent.
+# Unlike PLATFORM_HINTS (which describe the messaging channel), these describe
+# the machine/OS the agent's tools actually run on.
+# ---------------------------------------------------------------------------
+
+WSL_ENVIRONMENT_HINT = (
+    "You are running inside WSL (Windows Subsystem for Linux). "
+    "The Windows host filesystem is mounted under /mnt/ — "
+    "/mnt/c/ is the C: drive, /mnt/d/ is D:, etc. "
+    "The user's Windows files are typically at "
+    "/mnt/c/Users/<username>/Desktop/, Documents/, Downloads/, etc. "
+    "When the user references Windows paths or desktop files, translate "
+    "to the /mnt/c/ equivalent. You can list /mnt/c/Users/ to discover "
+    "the Windows username if needed."
+)
+
+
+def build_environment_hints() -> str:
+    """Return environment-specific guidance for the system prompt.
+
+    Detects WSL, and can be extended for Termux, Docker, etc.
+    Returns an empty string when no special environment is detected.
+    """
+    hints: list[str] = []
+    if is_wsl():
+        hints.append(WSL_ENVIRONMENT_HINT)
+    return "\n\n".join(hints)
+
+
 CONTEXT_FILE_MAX_CHARS = 20_000
 CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
@@ -726,8 +768,16 @@ def build_skills_system_prompt(

        result = (
            "## Skills (mandatory)\n"
-            "Before replying, scan the skills below. If one clearly matches your task, "
-            "load it with skill_view(name) and follow its instructions. "
+            "Before replying, scan the skills below. If a skill matches or is even partially relevant "
+            "to your task, you MUST load it with skill_view(name) and follow its instructions. "
+            "Err on the side of loading — it is always better to have context you don't need "
+            "than to miss critical steps, pitfalls, or established workflows. "
+            "Skills contain specialized knowledge — API endpoints, tool-specific commands, "
+            "and proven workflows that outperform general-purpose approaches. Load the skill "
+            "even if you think you could handle the task with basic tools like web_search or terminal. "
+            "Skills also encode the user's preferred approach, conventions, and quality standards "
+            "for tasks like code review, planning, and testing — load them even for tasks you "
+            "already know how to do, because the skill defines how it should be done here.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -737,7 +787,7 @@ def build_skills_system_prompt(
            + "\n".join(index_lines) + "\n"
            "</available_skills>\n"
            "\n"
-            "If none match, proceed normally without loading a skill."
+            "Only proceed without loading a skill if genuinely none are relevant to the task."
        )

    # ── Store in LRU cache ────────────────────────────────────────────
@@ -36,7 +36,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =

    try:
        response = call_llm(
-            task="compression",  # reuse compression task config (cheap/fast model)
+            task="title_generation",
            messages=messages,
            max_tokens=30,
            temperature=0.3,
@@ -538,6 +538,7 @@ class BatchRunner:
        reasoning_config: Dict[str, Any] = None,
        prefill_messages: List[Dict[str, Any]] = None,
        max_samples: int = None,
+        output_dir: str = None,
    ):
        """
        Initialize the batch runner.
@@ -590,7 +591,7 @@ class BatchRunner:
            raise ValueError(f"Unknown distribution: {distribution}. Available: {list(list_distributions().keys())}")
        
        # Setup output directory
-        self.output_dir = Path("data") / run_name
+        self.output_dir = Path(output_dir) if output_dir else Path("data") / run_name
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # Checkpoint file
@@ -1124,6 +1125,7 @@ def main(
    verbose: bool = False,
    list_distributions: bool = False,
    ephemeral_system_prompt: str = None,
+    ephemeral_system_prompt_file: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
    providers_ignored: str = None,
@@ -1134,6 +1136,7 @@ def main(
    reasoning_disabled: bool = False,
    prefill_messages_file: str = None,
    max_samples: int = None,
+    output_dir: str = None,
 ):
    """
    Run batch processing of agent prompts from a dataset.
@@ -1200,6 +1203,11 @@ def main(
        print("                         --run_name=my_run --distribution=<name>")
        return
    
+    # Load system prompt from file if provided
+    if ephemeral_system_prompt_file and not ephemeral_system_prompt:
+        with open(ephemeral_system_prompt_file) as _f:
+            ephemeral_system_prompt = _f.read()
+
    # Validate required arguments
    if not dataset_file:
        print("❌ Error: --dataset_file is required")
@@ -1271,6 +1279,7 @@ def main(
            reasoning_config=reasoning_config,
            prefill_messages=prefill_messages,
            max_samples=max_samples,
+            output_dir=output_dir,
        )

        runner.run(resume=resume)
@@ -309,15 +309,8 @@ compression:
  # compression of older turns.
  protect_last_n: 20

-  # Model to use for generating summaries (fast/cheap recommended)
-  # This model compresses the middle turns into a concise summary.
-  # IMPORTANT: it receives the full middle section of the conversation, so it
-  # MUST support a context length at least as large as your main model's.
-  summary_model: "google/gemini-3-flash-preview"
-  
-  # Provider for the summary model (default: "auto")
-  # Options: "auto", "openrouter", "nous", "main"
-  # summary_provider: "auto"
+  # To pin a specific model/provider for compression summaries, use the
+  # auxiliary section below (auxiliary.compression.provider / model).

 # =============================================================================
 # Auxiliary Models (Advanced — Experimental)
@@ -237,7 +237,6 @@ def load_cli_config() -> Dict[str, Any]:
        "compression": {
            "enabled": True,      # Auto-compress when approaching context limit
            "threshold": 0.50,    # Compress at 50% of model's context limit
-            "summary_model": "",  # Model for summaries (empty = use main model)
        },
        "smart_model_routing": {
            "enabled": False,
@@ -1822,6 +1821,8 @@ class HermesCLI:
        self._secret_deadline = 0
        self._spinner_text: str = ""  # thinking spinner text for TUI
        self._tool_start_time: float = 0.0  # monotonic timestamp when current tool started (for live elapsed)
+        self._pending_tool_info: dict = {}  # function_name -> list of (preview, args) for stacked scrollback
+        self._last_scrollback_tool: str = ""  # last tool name printed to scrollback (for "new" dedup)
        self._command_running = False
        self._command_status = ""
        self._attached_images: list[Path] = []
@@ -2418,8 +2419,8 @@ class HermesCLI:
        # suppress them during streaming too — unless show_reasoning is
        # enabled, in which case we route the inner content to the
        # reasoning display box instead of discarding it.
-        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>")
-        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>")
+        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>", "<thought>")
+        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>", "</thought>")

        # Append to a pre-filter buffer first
        self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
@@ -2733,6 +2734,22 @@ class HermesCLI:
        if runtime_model and isinstance(runtime_model, str):
            self.model = runtime_model

+        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
+        # without `hermes model`), fall back to the provider's first catalog
+        # model so the API call doesn't fail with "model must be non-empty".
+        if not self.model and resolved_provider:
+            try:
+                from hermes_cli.models import get_default_model_for_provider
+                _default = get_default_model_for_provider(resolved_provider)
+                if _default:
+                    self.model = _default
+                    logger.info(
+                        "No model configured — defaulting to %s for provider %s",
+                        _default, resolved_provider,
+                    )
+            except Exception:
+                pass
+
        # Normalize model for the resolved provider (e.g. swap non-Codex
        # models when provider is openai-codex).  Fixes #651.
        model_changed = self._normalize_model_for_provider(resolved_provider)
@@ -2981,8 +2998,10 @@ class HermesCLI:
                )

        # Warn if the configured model is a Nous Hermes LLM (not agentic)
+        from hermes_cli.model_switch import is_nous_hermes_non_agentic
+
        model_name = getattr(self, "model", "") or ""
-        if "hermes" in model_name.lower():
+        if is_nous_hermes_non_agentic(model_name):
            self.console.print()
            self.console.print(
                "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
@@ -3096,6 +3115,8 @@ class HermesCLI:

        # Collect displayable entries (skip system, tool-result messages)
        entries = []  # list of (role, display_text)
+        _last_asst_idx = None       # index of last assistant entry
+        _last_asst_full = None      # un-truncated display text for last assistant
        for msg in self.conversation_history:
            role = msg.get("role", "")
            content = msg.get("content")
@@ -3125,7 +3146,9 @@ class HermesCLI:
                text = "" if content is None else str(content)
                text = _strip_reasoning(text)
                parts = []
+                full_parts = []  # un-truncated version
                if text:
+                    full_parts.append(text)
                    lines = text.splitlines()
                    if len(lines) > MAX_ASST_LINES:
                        text = "\n".join(lines[:MAX_ASST_LINES]) + " ..."
@@ -3145,11 +3168,15 @@ class HermesCLI:
                    if len(names) > 4:
                        names_str += ", ..."
                    noun = "call" if tc_count == 1 else "calls"
-                    parts.append(f"[{tc_count} tool {noun}: {names_str}]")
+                    tc_summary = f"[{tc_count} tool {noun}: {names_str}]"
+                    parts.append(tc_summary)
+                    full_parts.append(tc_summary)
                if not parts:
                    # Skip pure-reasoning messages that have no visible output
                    continue
                entries.append(("assistant", " ".join(parts)))
+                _last_asst_idx = len(entries) - 1
+                _last_asst_full = " ".join(full_parts)

        if not entries:
            return
@@ -3160,6 +3187,13 @@ class HermesCLI:
            skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2
            entries = entries[skipped:]

+        # Replace last assistant entry with full (un-truncated) text
+        # so the user can see where they left off without wasting tokens.
+        if _last_asst_idx is not None and _last_asst_full:
+            adj_idx = _last_asst_idx - skipped
+            if 0 <= adj_idx < len(entries):
+                entries[adj_idx] = ("assistant_last", _last_asst_full)
+
        # Build the display using Rich
        from rich.panel import Panel
        from rich.text import Text
@@ -3192,6 +3226,13 @@ class HermesCLI:
                lines.append(msg_lines[0] + "\n", style="dim")
                for ml in msg_lines[1:]:
                    lines.append(f"         {ml}\n", style="dim")
+            elif role == "assistant_last":
+                # Last assistant response shown in full, non-dim
+                lines.append("  ◆ Hermes: ", style=f"bold {_assistant_label_c}")
+                msg_lines = text.splitlines()
+                lines.append(msg_lines[0] + "\n", style="")
+                for ml in msg_lines[1:]:
+                    lines.append(f"            {ml}\n", style="")
            else:
                lines.append("  ◆ Hermes: ", style=f"dim bold {_assistant_label_c}")
                msg_lines = text.splitlines()
@@ -3336,6 +3377,93 @@ class HermesCLI:
            # Treat as a git hash
            return ref

+    def _handle_snapshot_command(self, command: str):
+        """Handle /snapshot — lightweight state snapshots for Hermes config/state.
+
+        Syntax:
+            /snapshot                  — list recent snapshots
+            /snapshot create [label]   — create a snapshot
+            /snapshot restore <id>     — restore state from snapshot
+            /snapshot prune [N]        — prune to N snapshots (default 20)
+        """
+        from hermes_cli.backup import (
+            create_quick_snapshot, list_quick_snapshots,
+            restore_quick_snapshot, prune_quick_snapshots,
+        )
+        from hermes_constants import display_hermes_home
+
+        parts = command.split()
+        subcmd = parts[1].lower() if len(parts) > 1 else "list"
+
+        if subcmd in ("list", "ls"):
+            snaps = list_quick_snapshots()
+            if not snaps:
+                print("  No state snapshots yet.")
+                print("  Create one: /snapshot create [label]")
+                return
+            print(f"  State snapshots ({display_hermes_home()}/state-snapshots/):\n")
+            print(f"  {'#':>3}  {'ID':<35} {'Files':>5} {'Size':>10} {'Label'}")
+            print(f"  {'─'*3}  {'─'*35} {'─'*5} {'─'*10} {'─'*20}")
+            for i, s in enumerate(snaps, 1):
+                size = s.get("total_size", 0)
+                if size < 1024:
+                    size_str = f"{size} B"
+                elif size < 1024 * 1024:
+                    size_str = f"{size / 1024:.0f} KB"
+                else:
+                    size_str = f"{size / 1024 / 1024:.1f} MB"
+                label = s.get("label") or ""
+                print(f"  {i:3}  {s['id']:<35} {s.get('file_count', 0):>5} {size_str:>10} {label}")
+
+        elif subcmd == "create":
+            label = " ".join(parts[2:]) if len(parts) > 2 else None
+            snap_id = create_quick_snapshot(label=label)
+            if snap_id:
+                print(f"  Snapshot created: {snap_id}")
+            else:
+                print("  No state files found to snapshot.")
+
+        elif subcmd in ("restore", "rewind"):
+            if len(parts) < 3:
+                print("  Usage: /snapshot restore <snapshot-id>")
+                # Show hint with most recent snapshot
+                snaps = list_quick_snapshots(limit=1)
+                if snaps:
+                    print(f"  Most recent: {snaps[0]['id']}")
+                return
+            snap_id = parts[2]
+            # Allow restore by number (1-indexed)
+            try:
+                idx = int(snap_id)
+                snaps = list_quick_snapshots()
+                if 1 <= idx <= len(snaps):
+                    snap_id = snaps[idx - 1]["id"]
+                else:
+                    print(f"  Invalid snapshot number. Use 1-{len(snaps)}.")
+                    return
+            except ValueError:
+                pass
+            if restore_quick_snapshot(snap_id):
+                print(f"  Restored state from: {snap_id}")
+                print("  Restart recommended for state.db changes to take effect.")
+            else:
+                print(f"  Snapshot not found: {snap_id}")
+
+        elif subcmd == "prune":
+            keep = 20
+            if len(parts) > 2:
+                try:
+                    keep = int(parts[2])
+                except ValueError:
+                    print("  Usage: /snapshot prune [keep-count]")
+                    return
+            deleted = prune_quick_snapshots(keep=keep)
+            print(f"  Pruned {deleted} old snapshot(s) (keeping {keep}).")
+
+        else:
+            print(f"  Unknown subcommand: {subcmd}")
+            print("  Usage: /snapshot [list|create [label]|restore <id>|prune [N]]")
+
    def _handle_stop_command(self):
        """Handle /stop — kill all running background processes.

@@ -5242,9 +5370,33 @@ class HermesCLI:
                        context_length=ctx_len,
                    )
                _cprint("  ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n")
+                # Show a random tip on new session
+                try:
+                    from hermes_cli.tips import get_random_tip
+                    _tip = get_random_tip()
+                    try:
+                        from hermes_cli.skin_engine import get_active_skin
+                        _tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
+                    except Exception:
+                        _tip_color = "#B8860B"
+                    cc.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+                except Exception:
+                    pass
            else:
                self.show_banner()
                print("  ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n")
+                # Show a random tip on new session
+                try:
+                    from hermes_cli.tips import get_random_tip
+                    _tip = get_random_tip()
+                    try:
+                        from hermes_cli.skin_engine import get_active_skin
+                        _tip_color = get_active_skin().get_color("banner_dim", "#B8860B")
+                    except Exception:
+                        _tip_color = "#B8860B"
+                    self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+                except Exception:
+                    pass
        elif canonical == "history":
            self.show_history()
        elif canonical == "title":
@@ -5344,15 +5496,21 @@ class HermesCLI:
        elif canonical == "fast":
            self._handle_fast_command(cmd_original)
        elif canonical == "compress":
-            self._manual_compress()
+            self._manual_compress(cmd_original)
        elif canonical == "usage":
            self._show_usage()
        elif canonical == "insights":
            self._show_insights(cmd_original)
+        elif canonical == "debug":
+            self._handle_debug_command()
        elif canonical == "paste":
            self._handle_paste_command()
        elif canonical == "image":
            self._handle_image_command(cmd_original)
+        elif canonical == "reload":
+            from hermes_cli.config import reload_env
+            count = reload_env()
+            print(f"  Reloaded .env ({count} var(s) updated)")
        elif canonical == "reload-mcp":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._reload_mcp()
@@ -5381,6 +5539,8 @@ class HermesCLI:
                print(f"Plugin system error: {e}")
        elif canonical == "rollback":
            self._handle_rollback_command(cmd_original)
+        elif canonical == "snapshot":
+            self._handle_snapshot_command(cmd_original)
        elif canonical == "stop":
            self._handle_stop_command()
        elif canonical == "background":
@@ -6201,8 +6361,14 @@ class HermesCLI:
        self._reasoning_preview_buf = getattr(self, "_reasoning_preview_buf", "") + reasoning_text
        self._flush_reasoning_preview(force=False)

-    def _manual_compress(self):
-        """Manually trigger context compression on the current conversation."""
+    def _manual_compress(self, cmd_original: str = ""):
+        """Manually trigger context compression on the current conversation.
+
+        Accepts an optional focus topic: ``/compress <focus>`` guides the
+        summariser to preserve information related to *focus* while being
+        more aggressive about discarding everything else.  Inspired by
+        Claude Code's ``/compact <focus>`` feature.
+        """
        if not self.conversation_history or len(self.conversation_history) < 4:
            print("(._.) Not enough conversation to compress (need at least 4 messages).")
            return
@@ -6215,18 +6381,30 @@ class HermesCLI:
            print("(._.) Compression is disabled in config.")
            return

+        # Extract optional focus topic from the command (e.g. "/compress database schema")
+        focus_topic = ""
+        if cmd_original:
+            parts = cmd_original.strip().split(None, 1)
+            if len(parts) > 1:
+                focus_topic = parts[1].strip()
+
        original_count = len(self.conversation_history)
        try:
            from agent.model_metadata import estimate_messages_tokens_rough
            from agent.manual_compression_feedback import summarize_manual_compression
            original_history = list(self.conversation_history)
            approx_tokens = estimate_messages_tokens_rough(original_history)
-            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+            if focus_topic:
+                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
+                      f"focus: \"{focus_topic}\"...")
+            else:
+                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

            compressed, _ = self.agent._compress_context(
                original_history,
                self.agent._cached_system_prompt or "",
                approx_tokens=approx_tokens,
+                focus_topic=focus_topic or None,
            )
            self.conversation_history = compressed
            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
@@ -6245,6 +6423,14 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")

+    def _handle_debug_command(self):
+        """Handle /debug — upload debug report + logs and print paste URLs."""
+        from hermes_cli.debug import run_debug_share
+        from types import SimpleNamespace
+
+        args = SimpleNamespace(lines=200, expire=7, local=False)
+        run_debug_share(args)
+
    def _show_usage(self):
        """Show rate limits (if available) and session token usage."""
        if not self.agent:
@@ -6542,10 +6728,36 @@ class HermesCLI:
        On tool.started, records a monotonic timestamp so get_spinner_text()
        can show a live elapsed timer (the TUI poll loop already invalidates
        every ~0.15s, so the counter updates automatically).
+
+        When tool_progress_mode is "all" or "new", also prints a persistent
+        stacked line to scrollback on tool.completed so users can see the
+        full history of tool calls (not just the current one in the spinner).
        """
        if event_type == "tool.completed":
            import time as _time
            self._tool_start_time = 0.0
+            # Print stacked scrollback line for "all" / "new" modes
+            if function_name and self.tool_progress_mode in ("all", "new"):
+                duration = kwargs.get("duration", 0.0)
+                is_error = kwargs.get("is_error", False)
+                # Pop stored args from tool.started for this function
+                stored = self._pending_tool_info.get(function_name)
+                stored_args = stored.pop(0) if stored else {}
+                if stored is not None and not stored:
+                    del self._pending_tool_info[function_name]
+                # "new" mode: skip consecutive repeats of the same tool
+                if self.tool_progress_mode == "new" and function_name == self._last_scrollback_tool:
+                    self._invalidate()
+                    return
+                self._last_scrollback_tool = function_name
+                try:
+                    from agent.display import get_cute_tool_message
+                    line = get_cute_tool_message(function_name, stored_args, duration)
+                    if is_error:
+                        line = f"{line} [error]"
+                    _cprint(f"  {line}")
+                except Exception:
+                    pass
            self._invalidate()
            return
        if event_type != "tool.started":
@@ -6561,6 +6773,10 @@ class HermesCLI:
                label = label[:_pl - 3] + "..."
            self._spinner_text = f"{emoji} {label}"
            self._tool_start_time = _time.monotonic()
+            # Store args for stacked scrollback line on completion
+            self._pending_tool_info.setdefault(function_name, []).append(
+                function_args if function_args is not None else {}
+            )
            self._invalidate()

        if not self._voice_mode:
@@ -7527,8 +7743,10 @@ class HermesCLI:
                        "error": _summary,
                    }

-            # Start agent in background thread
-            agent_thread = threading.Thread(target=run_agent)
+            # Start agent in background thread (daemon so it cannot keep the
+            # process alive when the user closes the terminal tab — SIGHUP
+            # exits the main thread and daemon threads are reaped automatically).
+            agent_thread = threading.Thread(target=run_agent, daemon=True)
            agent_thread.start()

            # Monitor the dedicated interrupt queue while the agent runs.
@@ -7714,6 +7932,17 @@ class HermesCLI:
                sys.stdout.write("\a")
                sys.stdout.flush()

+            # Notify when iteration budget was hit
+            if result and not result.get("completed") and not result.get("interrupted"):
+                _api_calls = result.get("api_calls", 0)
+                if _api_calls >= getattr(self.agent, "max_iterations", 90):
+                    _max_iter = getattr(self.agent, "max_iterations", 90)
+                    _cprint(
+                        f"\n{_DIM}⚠ Iteration budget reached "
+                        f"({_api_calls}/{_max_iter}) — "
+                        f"response may be incomplete{_RST}"
+                    )
+
            # Speak response aloud if voice TTS is enabled
            # Skip batch TTS when streaming TTS already handled it
            if self._voice_tts and response and not use_streaming_tts:
@@ -8025,6 +8254,17 @@ class HermesCLI:
            _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands."
            _welcome_color = "#FFF8DC"
        self.console.print(f"[{_welcome_color}]{_welcome_text}[/]")
+        # Show a random tip to help users discover features
+        try:
+            from hermes_cli.tips import get_random_tip
+            _tip = get_random_tip()
+            try:
+                _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B")
+            except Exception:
+                _tip_color = "#B8860B"
+            self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
+        except Exception:
+            pass  # Tips are non-critical — never break startup
        if self.preloaded_skills and not self._startup_skills_line_shown:
            skills_label = ", ".join(self.preloaded_skills)
            self.console.print(
@@ -8543,6 +8783,9 @@ class HermesCLI:
            if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
+                # Sanitize surrogate characters (e.g. from Word/Google Docs paste) before writing
+                from run_agent import _sanitize_surrogates
+                pasted_text = _sanitize_surrogates(pasted_text)
                line_count = pasted_text.count('\n')
                buf = event.current_buffer
                if line_count >= 5 and not buf.text.strip().startswith('/'):
@@ -9300,9 +9543,14 @@ class HermesCLI:
                                from tools.process_registry import process_registry
                                if not process_registry.completion_queue.empty():
                                    evt = process_registry.completion_queue.get_nowait()
-                                    _synth = _format_process_notification(evt)
-                                    if _synth:
-                                        self._pending_input.put(_synth)
+                                    # Skip if the agent already consumed this via wait/poll/log
+                                    _evt_sid = evt.get("session_id", "")
+                                    if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
+                                        pass  # already delivered via tool result
+                                    else:
+                                        _synth = _format_process_notification(evt)
+                                        if _synth:
+                                            self._pending_input.put(_synth)
                            except Exception:
                                pass
                        continue
@@ -9401,6 +9649,8 @@ class HermesCLI:
                        self._agent_running = False
                        self._spinner_text = ""
                        self._tool_start_time = 0.0
+                        self._pending_tool_info.clear()
+                        self._last_scrollback_tool = ""

                        app.invalidate()  # Refresh status line

@@ -9426,6 +9676,10 @@ class HermesCLI:
                            from tools.process_registry import process_registry
                            while not process_registry.completion_queue.empty():
                                evt = process_registry.completion_queue.get_nowait()
+                                # Skip if the agent already consumed this via wait/poll/log
+                                _evt_sid = evt.get("session_id", "")
+                                if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid):
+                                    continue  # already delivered via tool result
                                _synth = _format_process_notification(evt)
                                if _synth:
                                    self._pending_input.put(_synth)
@@ -9457,17 +9711,37 @@ class HermesCLI:
            pass  # Signal handlers may fail in restricted environments
        
        # Install a custom asyncio exception handler that suppresses the
-        # "Event loop is closed" RuntimeError from httpx transport cleanup.
-        # This is defense-in-depth — the primary fix is neuter_async_httpx_del
-        # which disables __del__ entirely, but older clients or SDK upgrades
-        # could bypass it.
+        # "Event loop is closed" RuntimeError from httpx transport cleanup
+        # and the "0 is not registered" KeyError from broken stdin (#6393).
+        # The RuntimeError fix is defense-in-depth — the primary fix is
+        # neuter_async_httpx_del which disables __del__ entirely.  The
+        # KeyError fix handles macOS + uv-managed Python environments where
+        # fd 0 is not reliably available to the asyncio selector.
        def _suppress_closed_loop_errors(loop, context):
            exc = context.get("exception")
            if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc):
                return  # silently suppress
+            if isinstance(exc, KeyError) and "is not registered" in str(exc):
+                return  # suppress selector registration failures (#6393)
            # Fall back to default handler for everything else
            loop.default_exception_handler(context)

+        # Validate stdin before launching prompt_toolkit — on macOS with
+        # uv-managed Python, fd 0 can be invalid or unregisterable with the
+        # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
+        try:
+            import os as _os
+            _os.fstat(0)
+        except OSError:
+            print(
+                "Error: stdin (fd 0) is not available.\n"
+                "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
+                "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
+            )
+            _run_cleanup()
+            self._print_exit_summary()
+            return
+
        # Run the application with patch_stdout for proper output handling
        try:
            with patch_stdout():
@@ -9481,8 +9755,28 @@ class HermesCLI:
                app.run()
        except (EOFError, KeyboardInterrupt, BrokenPipeError):
            pass
+        except (KeyError, OSError) as _stdin_err:
+            # Catch selector registration failures from broken stdin (#6393).
+            # This is the fallback for cases that slip past the fstat() guard.
+            if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
+                print(
+                    f"\nError: stdin is not usable ({_stdin_err}).\n"
+                    "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
+                    "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
+                )
+            else:
+                raise
        finally:
            self._should_exit = True
+            # Interrupt the agent immediately so its daemon thread stops making
+            # API calls and exits promptly (agent_thread is daemon, so the
+            # process will exit once the main thread finishes, but interrupting
+            # avoids wasted API calls and lets run_conversation clean up).
+            if self.agent and getattr(self, '_agent_running', False):
+                try:
+                    self.agent.interrupt()
+                except Exception:
+                    pass
            # Flush memories before exit (only for substantial conversations)
            if self.agent and self.conversation_history:
                try:
@@ -1,15 +0,0 @@
-# Termux / Android dependency constraints for Hermes Agent.
-#
-# Usage:
-#   python -m pip install -e '.[termux]' -c constraints-termux.txt
-#
-# These pins keep the tested Android install path stable when upstream packages
-# move faster than Termux-compatible wheels / sdists.
-
-ipython<10
-jedi>=0.18.1,<0.20
-parso>=0.8.4,<0.9
-stack-data>=0.6,<0.7
-pexpect>4.3,<5
-matplotlib-inline>=0.1.7,<0.2
-asttokens>=2.1,<3
@@ -219,6 +219,21 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    chat_id = target["chat_id"]
    thread_id = target.get("thread_id")

+    # Diagnostic: log thread_id for topic-aware delivery debugging
+    origin = job.get("origin") or {}
+    origin_thread = origin.get("thread_id")
+    if origin_thread and not thread_id:
+        logger.warning(
+            "Job '%s': origin has thread_id=%s but delivery target lost it "
+            "(deliver=%s, target=%s)",
+            job["id"], origin_thread, job.get("deliver", "local"), target,
+        )
+    elif thread_id:
+        logger.debug(
+            "Job '%s': delivering to %s:%s thread_id=%s",
+            job["id"], platform_name, chat_id, thread_id,
+        )
+
    from tools.send_message_tool import _send_to_platform
    from gateway.config import load_gateway_config, Platform

@@ -626,6 +641,15 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        except Exception as e:
            logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)

+        # Apply IPv4 preference if configured.
+        try:
+            from hermes_constants import apply_ipv4_preference
+            _net_cfg = _cfg.get("network", {})
+            if isinstance(_net_cfg, dict) and _net_cfg.get("force_ipv4"):
+                apply_ipv4_preference(force=True)
+        except Exception:
+            pass
+
        # Reasoning config from config.yaml
        from hermes_constants import parse_reasoning_effort
        effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
@@ -5,6 +5,33 @@ set -e
 HERMES_HOME="/opt/data"
 INSTALL_DIR="/opt/hermes"

+# --- Privilege dropping via gosu ---
+# When started as root (the default), optionally remap the hermes user/group
+# to match host-side ownership, fix volume permissions, then re-exec as hermes.
+if [ "$(id -u)" = "0" ]; then
+    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
+        echo "Changing hermes UID to $HERMES_UID"
+        usermod -u "$HERMES_UID" hermes
+    fi
+
+    if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
+        echo "Changing hermes GID to $HERMES_GID"
+        groupmod -g "$HERMES_GID" hermes
+    fi
+
+    actual_hermes_uid=$(id -u hermes)
+    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
+        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
+        chown -R hermes:hermes "$HERMES_HOME"
+    fi
+
+    echo "Dropping root privileges"
+    exec gosu hermes "$0" "$@"
+fi
+
+# --- Running as hermes from here ---
+source "${INSTALL_DIR}/.venv/bin/activate"
+
 # Create essential directory structure.  Cache and platform directories
 # (cache/images, cache/audio, platforms/whatsapp, etc.) are created on
 # demand by the application — don't pre-create them here so new installs
@@ -118,7 +118,7 @@ For executed migrations, the full report is saved to `~/.hermes/migration/opencl
 ## Troubleshooting

 ### "OpenClaw directory not found"
-The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moldbot`. If your OpenClaw is installed elsewhere, use `--source`:
+The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`:
 ```bash
 hermes claw migrate --source /path/to/.openclaw
 ```
@@ -665,6 +665,17 @@ def load_gateway_config() -> GatewayConfig:
    _apply_env_overrides(config)
    
    # --- Validate loaded values ---
+    _validate_gateway_config(config)
+
+    return config
+
+
+def _validate_gateway_config(config: "GatewayConfig") -> None:
+    """Validate and sanitize a loaded GatewayConfig in place.
+
+    Called by ``load_gateway_config()`` after all config sources are merged.
+    Extracted as a separate function for testability.
+    """
    policy = config.default_reset_policy

    if not (0 <= policy.at_hour <= 23):
@@ -701,7 +712,31 @@ def load_gateway_config() -> GatewayConfig:
                platform.value, env_name,
            )

-    return config
+    # Reject known-weak placeholder tokens.
+    # Ported from openclaw/openclaw#64586: users who copy .env.example
+    # without changing placeholder values get a clear startup error instead
+    # of a confusing "auth failed" from the platform API.
+    try:
+        from hermes_cli.auth import has_usable_secret
+    except ImportError:
+        has_usable_secret = None  # type: ignore[assignment]
+
+    if has_usable_secret is not None:
+        for platform, pconfig in config.platforms.items():
+            if not pconfig.enabled:
+                continue
+            env_name = _token_env_names.get(platform)
+            if not env_name:
+                continue
+            token = pconfig.token
+            if token and token.strip() and not has_usable_secret(token, min_length=4):
+                logger.error(
+                    "%s is enabled but %s is set to a placeholder value ('%s'). "
+                    "Set a real bot token before starting the gateway. "
+                    "The adapter will NOT be started.",
+                    platform.value, env_name, token.strip()[:6] + "...",
+                )
+                pconfig.enabled = False


 def _apply_env_overrides(config: GatewayConfig) -> None:
@@ -82,7 +82,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {

    # Tier 3 — no edit support, progress messages are permanent
    "signal":          _TIER_LOW,
-    "whatsapp":        _TIER_LOW,
+    "whatsapp":        _TIER_MEDIUM,  # Baileys bridge supports /edit
    "bluebubbles":     _TIER_LOW,
    "weixin":          _TIER_LOW,
    "wecom":           _TIER_LOW,
@@ -54,6 +54,66 @@ DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
 MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies
 CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
+MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
+MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array
+
+
+def _normalize_chat_content(
+    content: Any, *, _max_depth: int = 10, _depth: int = 0,
+) -> str:
+    """Normalize OpenAI chat message content into a plain text string.
+
+    Some clients (Open WebUI, LobeChat, etc.) send content as an array of
+    typed parts instead of a plain string::
+
+        [{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}]
+
+    This function flattens those into a single string so the agent pipeline
+    (which expects strings) doesn't choke.
+
+    Defensive limits prevent abuse: recursion depth, list size, and output
+    length are all bounded.
+    """
+    if _depth > _max_depth:
+        return ""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+
+    if isinstance(content, list):
+        parts: List[str] = []
+        items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+        for item in items:
+            if isinstance(item, str):
+                if item:
+                    parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH])
+            elif isinstance(item, dict):
+                item_type = str(item.get("type") or "").strip().lower()
+                if item_type in {"text", "input_text", "output_text"}:
+                    text = item.get("text", "")
+                    if text:
+                        try:
+                            parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH])
+                        except Exception:
+                            pass
+                # Silently skip image_url / other non-text parts
+            elif isinstance(item, list):
+                nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1)
+                if nested:
+                    parts.append(nested)
+            # Check accumulated size
+            if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH:
+                break
+        result = "\n".join(parts)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+
+    # Fallback for unexpected types (int, float, bool, etc.)
+    try:
+        result = str(content)
+        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
+    except Exception:
+        return ""


 def check_api_server_requirements() -> bool:
@@ -553,7 +613,7 @@ class APIServerAdapter(BasePlatformAdapter):

        for msg in messages:
            role = msg.get("role", "")
-            content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
                # Accumulate system messages
                if system_prompt is None:
@@ -926,18 +986,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = item.get("content", "")
-                    # Handle content that may be a list of content parts
-                    if isinstance(content, list):
-                        text_parts = []
-                        for part in content:
-                            if isinstance(part, dict) and part.get("type") == "input_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, dict) and part.get("type") == "output_text":
-                                text_parts.append(part.get("text", ""))
-                            elif isinstance(part, str):
-                                text_parts.append(part)
-                        content = "\n".join(text_parts)
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1770,6 +1819,23 @@ class APIServerAdapter(BasePlatformAdapter):
                )
                return False

+            # Refuse to start network-accessible with a placeholder key.
+            # Ported from openclaw/openclaw#64586.
+            if is_network_accessible(self._host) and self._api_key:
+                try:
+                    from hermes_cli.auth import has_usable_secret
+                    if not has_usable_secret(self._api_key, min_length=8):
+                        logger.error(
+                            "[%s] Refusing to start: API_SERVER_KEY is set to a "
+                            "placeholder value. Generate a real secret "
+                            "(e.g. `openssl rand -hex 32`) and set API_SERVER_KEY "
+                            "before exposing the API server on %s.",
+                            self.name, self._host,
+                        )
+                        return False
+                except ImportError:
+                    pass
+
            # Port conflict detection — fail fast if port is already in use
            try:
                with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
@@ -21,6 +21,59 @@ from urllib.parse import urlsplit
 logger = logging.getLogger(__name__)


+def utf16_len(s: str) -> int:
+    """Count UTF-16 code units in *s*.
+
+    Telegram's message-length limit (4 096) is measured in UTF-16 code units,
+    **not** Unicode code-points.  Characters outside the Basic Multilingual
+    Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
+    surrogate pairs and therefore consume **two** UTF-16 code units each, even
+    though Python's ``len()`` counts them as one.
+
+    Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
+    Rust's ``chars().count()``.
+    """
+    return len(s.encode("utf-16-le")) // 2
+
+
+def _prefix_within_utf16_limit(s: str, limit: int) -> str:
+    """Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.
+
+    Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
+    we never slice a multi-code-unit character in half.
+    """
+    if utf16_len(s) <= limit:
+        return s
+    # Binary search for the longest safe prefix
+    lo, hi = 0, len(s)
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if utf16_len(s[:mid]) <= limit:
+            lo = mid
+        else:
+            hi = mid - 1
+    return s[:lo]
+
+
+def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
+    """Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.
+
+    Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
+    length in units different from Python codepoints (e.g. UTF-16 code units).
+    Falls back to binary search which is O(log n) calls to *len_fn*.
+    """
+    if len_fn(s) <= budget:
+        return len(s)
+    lo, hi = 0, len(s)
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if len_fn(s[:mid]) <= budget:
+            lo = mid
+        else:
+            hi = mid - 1
+    return lo
+
+
 def is_network_accessible(host: str) -> bool:
    """Return True if *host* would expose the server beyond loopback.

@@ -1886,7 +1939,11 @@ class BasePlatformAdapter(ABC):
        return content
    
    @staticmethod
-    def truncate_message(content: str, max_length: int = 4096) -> List[str]:
+    def truncate_message(
+        content: str,
+        max_length: int = 4096,
+        len_fn: Optional["Callable[[str], int]"] = None,
+    ) -> List[str]:
        """
        Split a long message into chunks, preserving code block boundaries.

@@ -1898,11 +1955,16 @@ class BasePlatformAdapter(ABC):
        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
+            len_fn: Optional length function for measuring string length.
+                     Defaults to ``len`` (Unicode code-points).  Pass
+                     ``utf16_len`` for platforms that measure message
+                     length in UTF-16 code units (e.g. Telegram).

        Returns:
            List of message chunks
        """
-        if len(content) <= max_length:
+        _len = len_fn or len
+        if _len(content) <= max_length:
            return [content]

        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
@@ -1921,22 +1983,33 @@ class BasePlatformAdapter(ABC):

            # How much body text we can fit after accounting for the prefix,
            # a potential closing fence, and the chunk indicator.
-            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+            headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
            if headroom < 1:
                headroom = max_length // 2

            # Everything remaining fits in one final chunk
-            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+            if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
                chunks.append(prefix + remaining)
                break

-            # Find a natural split point (prefer newlines, then spaces)
-            region = remaining[:headroom]
+            # Find a natural split point (prefer newlines, then spaces).
+            # When _len != len (e.g. utf16_len for Telegram), headroom is
+            # measured in the custom unit.  We need codepoint-based slice
+            # positions that stay within the custom-unit budget.
+            #
+            # _safe_slice_pos() maps a custom-unit budget to the largest
+            # codepoint offset whose custom length ≤ budget.
+            if _len is not len:
+                # Map headroom (custom units) → codepoint slice length
+                _cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
+            else:
+                _cp_limit = headroom
+            region = remaining[:_cp_limit]
            split_at = region.rfind("\n")
-            if split_at < headroom // 2:
+            if split_at < _cp_limit // 2:
                split_at = region.rfind(" ")
            if split_at < 1:
-                split_at = headroom
+                split_at = _cp_limit

            # Avoid splitting inside an inline code span (`...`).
            # If the text before split_at has an odd number of unescaped
@@ -1956,7 +2029,7 @@ class BasePlatformAdapter(ABC):
                    safe_split = candidate.rfind(" ", 0, last_bt)
                    nl_split = candidate.rfind("\n", 0, last_bt)
                    safe_split = max(safe_split, nl_split)
-                    if safe_split > headroom // 4:
+                    if safe_split > _cp_limit // 4:
                        split_at = safe_split

            chunk_body = remaining[:split_at]
@@ -442,6 +442,7 @@ class DiscordAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
        self._voice_text_channels: Dict[int, int] = {}  # guild_id -> text_channel_id
+        self._voice_sources: Dict[int, Dict[str, Any]] = {}  # guild_id -> linked text channel source metadata
        self._voice_timeout_tasks: Dict[int, asyncio.Task] = {}  # guild_id -> timeout task
        # Phase 2: voice listening
        self._voice_receivers: Dict[int, VoiceReceiver] = {}  # guild_id -> VoiceReceiver
@@ -456,6 +457,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # show the standard typing gateway event for bots)
        self._typing_tasks: Dict[str, asyncio.Task] = {}
        self._bot_task: Optional[asyncio.Task] = None
+        self._post_connect_task: Optional[asyncio.Task] = None
        # Dedup cache: prevents duplicate bot responses when Discord
        # RESUME replays events after reconnects.
        self._dedup = MessageDeduplicator()
@@ -545,15 +547,14 @@ class DiscordAdapter(BasePlatformAdapter):

                # Resolve any usernames in the allowed list to numeric IDs
                await adapter_self._resolve_allowed_usernames()
-
-                # Sync slash commands with Discord
-                try:
-                    synced = await adapter_self._client.tree.sync()
-                    logger.info("[%s] Synced %d slash command(s)", adapter_self.name, len(synced))
-                except Exception as e:  # pragma: no cover - defensive logging
-                    logger.warning("[%s] Slash command sync failed: %s", adapter_self.name, e, exc_info=True)
                adapter_self._ready_event.set()

+                if adapter_self._post_connect_task and not adapter_self._post_connect_task.done():
+                    adapter_self._post_connect_task.cancel()
+                adapter_self._post_connect_task = asyncio.create_task(
+                    adapter_self._run_post_connect_initialization()
+                )
+
            @self._client.event
            async def on_message(message: DiscordMessage):
                # Dedup: Discord RESUME replays events after reconnects (#4777)
@@ -686,14 +687,36 @@ class DiscordAdapter(BasePlatformAdapter):
            except Exception as e:  # pragma: no cover - defensive logging
                logger.warning("[%s] Error during disconnect: %s", self.name, e, exc_info=True)

+        if self._post_connect_task and not self._post_connect_task.done():
+            self._post_connect_task.cancel()
+            try:
+                await self._post_connect_task
+            except asyncio.CancelledError:
+                pass
+
        self._running = False
        self._client = None
        self._ready_event.clear()
+        self._post_connect_task = None

        self._release_platform_lock()

        logger.info("[%s] Disconnected", self.name)

+    async def _run_post_connect_initialization(self) -> None:
+        """Finish non-critical startup work after Discord is connected."""
+        if not self._client:
+            return
+        try:
+            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
+        except asyncio.TimeoutError:
+            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
+        except asyncio.CancelledError:
+            raise
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
+
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -1023,6 +1046,7 @@ class DiscordAdapter(BasePlatformAdapter):
        if task:
            task.cancel()
        self._voice_text_channels.pop(guild_id, None)
+        self._voice_sources.pop(guild_id, None)

    # Maximum seconds to wait for voice playback before giving up
    PLAYBACK_TIMEOUT = 120
@@ -2222,6 +2246,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id = str(message.channel.id)
            parent_channel_id = self._get_parent_channel_id(message.channel)

+        is_voice_linked_channel = False
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -2248,7 +2273,12 @@ class DiscordAdapter(BasePlatformAdapter):
                channel_ids.add(parent_channel_id)

            require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
-            is_free_channel = bool(channel_ids & free_channels)
+            # Voice-linked text channels act as free-response while voice is active.
+            # Only the exact bound channel gets the exemption, not sibling threads.
+            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
+            current_channel_id = str(message.channel.id)
+            is_voice_linked_channel = current_channel_id in voice_linked_ids
+            is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
@@ -2272,7 +2302,7 @@ class DiscordAdapter(BasePlatformAdapter):
            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
            skip_thread = bool(channel_ids & no_thread_channels)
            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread and not skip_thread:
+            if auto_thread and not skip_thread and not is_voice_linked_channel:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
@@ -34,6 +34,9 @@ from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional
+from urllib.error import HTTPError, URLError
+from urllib.parse import urlencode
+from urllib.request import Request, urlopen

 # aiohttp/websockets are independent optional deps — import outside lark_oapi
 # so they remain available for tests and webhook mode even if lark_oapi is missing.
@@ -169,6 +172,19 @@ _FEISHU_CARD_ACTION_DEDUP_TTL_SECONDS = 15 * 60    # card action token dedup win
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
 _FEISHU_ACK_EMOJI = "OK"
+
+# QR onboarding constants
+_ONBOARD_ACCOUNTS_URLS = {
+    "feishu": "https://accounts.feishu.cn",
+    "lark": "https://accounts.larksuite.com",
+}
+_ONBOARD_OPEN_URLS = {
+    "feishu": "https://open.feishu.cn",
+    "lark": "https://open.larksuite.com",
+}
+_REGISTRATION_PATH = "/oauth/v1/app/registration"
+_ONBOARD_REQUEST_TIMEOUT_S = 10
+
 # ---------------------------------------------------------------------------
 # Fallback display strings
 # ---------------------------------------------------------------------------
@@ -3621,3 +3637,328 @@ class FeishuAdapter(BasePlatformAdapter):
            return _FEISHU_FILE_UPLOAD_TYPE, "file"

        return _FEISHU_FILE_UPLOAD_TYPE, "file"
+
+
+# =============================================================================
+# QR scan-to-create onboarding
+#
+# Device-code flow: user scans a QR code with Feishu/Lark mobile app and the
+# platform creates a fully configured bot application automatically.
+# Called by `hermes gateway setup` via _setup_feishu() in hermes_cli/gateway.py.
+# =============================================================================
+
+
+def _accounts_base_url(domain: str) -> str:
+    return _ONBOARD_ACCOUNTS_URLS.get(domain, _ONBOARD_ACCOUNTS_URLS["feishu"])
+
+
+def _onboard_open_base_url(domain: str) -> str:
+    return _ONBOARD_OPEN_URLS.get(domain, _ONBOARD_OPEN_URLS["feishu"])
+
+
+def _post_registration(base_url: str, body: Dict[str, str]) -> dict:
+    """POST form-encoded data to the registration endpoint, return parsed JSON.
+
+    The registration endpoint returns JSON even on 4xx (e.g. poll returns
+    authorization_pending as a 400). We always parse the body regardless of
+    HTTP status.
+    """
+    url = f"{base_url}{_REGISTRATION_PATH}"
+    data = urlencode(body).encode("utf-8")
+    req = Request(url, data=data, headers={"Content-Type": "application/x-www-form-urlencoded"})
+    try:
+        with urlopen(req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except HTTPError as exc:
+        body_bytes = exc.read()
+        if body_bytes:
+            try:
+                return json.loads(body_bytes.decode("utf-8"))
+            except (ValueError, json.JSONDecodeError):
+                raise exc from None
+        raise
+
+
+def _init_registration(domain: str = "feishu") -> None:
+    """Verify the environment supports client_secret auth.
+
+    Raises RuntimeError if not supported.
+    """
+    base_url = _accounts_base_url(domain)
+    res = _post_registration(base_url, {"action": "init"})
+    methods = res.get("supported_auth_methods") or []
+    if "client_secret" not in methods:
+        raise RuntimeError(
+            f"Feishu / Lark registration environment does not support client_secret auth. "
+            f"Supported: {methods}"
+        )
+
+
+def _begin_registration(domain: str = "feishu") -> dict:
+    """Start the device-code flow. Returns device_code, qr_url, user_code, interval, expire_in."""
+    base_url = _accounts_base_url(domain)
+    res = _post_registration(base_url, {
+        "action": "begin",
+        "archetype": "PersonalAgent",
+        "auth_method": "client_secret",
+        "request_user_info": "open_id",
+    })
+    device_code = res.get("device_code")
+    if not device_code:
+        raise RuntimeError("Feishu / Lark registration did not return a device_code")
+    qr_url = res.get("verification_uri_complete", "")
+    if "?" in qr_url:
+        qr_url += "&from=hermes&tp=hermes"
+    else:
+        qr_url += "?from=hermes&tp=hermes"
+    return {
+        "device_code": device_code,
+        "qr_url": qr_url,
+        "user_code": res.get("user_code", ""),
+        "interval": res.get("interval") or 5,
+        "expire_in": res.get("expire_in") or 600,
+    }
+
+
+def _poll_registration(
+    *,
+    device_code: str,
+    interval: int,
+    expire_in: int,
+    domain: str = "feishu",
+) -> Optional[dict]:
+    """Poll until the user scans the QR code, or timeout/denial.
+
+    Returns dict with app_id, app_secret, domain, open_id on success.
+    Returns None on failure.
+    """
+    deadline = time.time() + expire_in
+    current_domain = domain
+    domain_switched = False
+    poll_count = 0
+
+    while time.time() < deadline:
+        base_url = _accounts_base_url(current_domain)
+        try:
+            res = _post_registration(base_url, {
+                "action": "poll",
+                "device_code": device_code,
+                "tp": "ob_app",
+            })
+        except (URLError, OSError, json.JSONDecodeError):
+            time.sleep(interval)
+            continue
+
+        poll_count += 1
+        if poll_count == 1:
+            print("  Fetching configuration results...", end="", flush=True)
+        elif poll_count % 6 == 0:
+            print(".", end="", flush=True)
+
+        # Domain auto-detection
+        user_info = res.get("user_info") or {}
+        tenant_brand = user_info.get("tenant_brand")
+        if tenant_brand == "lark" and not domain_switched:
+            current_domain = "lark"
+            domain_switched = True
+            # Fall through — server may return credentials in this same response.
+
+        # Success
+        if res.get("client_id") and res.get("client_secret"):
+            if poll_count > 0:
+                print()  # newline after "Fetching configuration results..." dots
+            return {
+                "app_id": res["client_id"],
+                "app_secret": res["client_secret"],
+                "domain": current_domain,
+                "open_id": user_info.get("open_id"),
+            }
+
+        # Terminal errors
+        error = res.get("error", "")
+        if error in ("access_denied", "expired_token"):
+            if poll_count > 0:
+                print()
+            logger.warning("[Feishu onboard] Registration %s", error)
+            return None
+
+        # authorization_pending or unknown — keep polling
+        time.sleep(interval)
+
+    if poll_count > 0:
+        print()
+    logger.warning("[Feishu onboard] Poll timed out after %ds", expire_in)
+    return None
+
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode()
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Verify bot connectivity via /open-apis/bot/v3/info.
+
+    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
+    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+    """
+    if FEISHU_AVAILABLE:
+        return _probe_bot_sdk(app_id, app_secret, domain)
+    return _probe_bot_http(app_id, app_secret, domain)
+
+
+def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
+    """Build a lark Client for the given credentials and domain."""
+    sdk_domain = LARK_DOMAIN if domain == "lark" else FEISHU_DOMAIN
+    return (
+        lark.Client.builder()
+        .app_id(app_id)
+        .app_secret(app_secret)
+        .domain(sdk_domain)
+        .log_level(lark.LogLevel.WARNING)
+        .build()
+    )
+
+
+def _parse_bot_response(data: dict) -> Optional[dict]:
+    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    if data.get("code") != 0:
+        return None
+    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
+    return {
+        "bot_name": bot.get("bot_name"),
+        "bot_open_id": bot.get("open_id"),
+    }
+
+
+def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Probe bot info using lark_oapi SDK."""
+    try:
+        client = _build_onboard_client(app_id, app_secret, domain)
+        resp = client.request(
+            method="GET",
+            url="/open-apis/bot/v3/info",
+            body=None,
+            raw_response=True,
+        )
+        return _parse_bot_response(json.loads(resp.content))
+    except Exception as exc:
+        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
+        return None
+
+
+def _probe_bot_http(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
+    """Fallback probe using raw HTTP (when lark_oapi is not installed)."""
+    base_url = _onboard_open_base_url(domain)
+    try:
+        token_data = json.dumps({"app_id": app_id, "app_secret": app_secret}).encode("utf-8")
+        token_req = Request(
+            f"{base_url}/open-apis/auth/v3/tenant_access_token/internal",
+            data=token_data,
+            headers={"Content-Type": "application/json"},
+        )
+        with urlopen(token_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            token_res = json.loads(resp.read().decode("utf-8"))
+
+        access_token = token_res.get("tenant_access_token")
+        if not access_token:
+            return None
+
+        bot_req = Request(
+            f"{base_url}/open-apis/bot/v3/info",
+            headers={
+                "Authorization": f"Bearer {access_token}",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(bot_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
+            bot_res = json.loads(resp.read().decode("utf-8"))
+
+        return _parse_bot_response(bot_res)
+    except (URLError, OSError, KeyError, json.JSONDecodeError) as exc:
+        logger.debug("[Feishu onboard] HTTP probe failed: %s", exc)
+        return None
+
+
+def qr_register(
+    *,
+    initial_domain: str = "feishu",
+    timeout_seconds: int = 600,
+) -> Optional[dict]:
+    """Run the Feishu / Lark scan-to-create QR registration flow.
+
+    Returns on success::
+
+        {
+            "app_id": str,
+            "app_secret": str,
+            "domain": "feishu" | "lark",
+            "open_id": str | None,
+            "bot_name": str | None,
+            "bot_open_id": str | None,
+        }
+
+    Returns None on expected failures (network, auth denied, timeout).
+    Unexpected errors (bugs, protocol regressions) propagate to the caller.
+    """
+    try:
+        return _qr_register_inner(initial_domain=initial_domain, timeout_seconds=timeout_seconds)
+    except (RuntimeError, URLError, OSError, json.JSONDecodeError) as exc:
+        logger.warning("[Feishu onboard] Registration failed: %s", exc)
+        return None
+
+
+def _qr_register_inner(
+    *,
+    initial_domain: str,
+    timeout_seconds: int,
+) -> Optional[dict]:
+    """Run init → begin → poll → probe. Raises on network/protocol errors."""
+    print("  Connecting to Feishu / Lark...", end="", flush=True)
+    _init_registration(initial_domain)
+    begin = _begin_registration(initial_domain)
+    print(" done.")
+
+    print()
+    qr_url = begin["qr_url"]
+    if _render_qr(qr_url):
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {qr_url}")
+    else:
+        print(f"  Open this URL in Feishu / Lark on your phone:\n\n  {qr_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+
+    result = _poll_registration(
+        device_code=begin["device_code"],
+        interval=begin["interval"],
+        expire_in=min(begin["expire_in"], timeout_seconds),
+        domain=initial_domain,
+    )
+    if not result:
+        return None
+
+    # Probe bot — best-effort, don't fail the registration
+    bot_info = probe_bot(result["app_id"], result["app_secret"], result["domain"])
+    if bot_info:
+        result["bot_name"] = bot_info.get("bot_name")
+        result["bot_open_id"] = bot_info.get("bot_open_id")
+    else:
+        result["bot_name"] = None
+        result["bot_open_id"] = None
+
+    return result
@@ -18,6 +18,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """

@@ -104,7 +105,7 @@ MAX_MESSAGE_LENGTH = 4000
 # Uses get_hermes_home() so each profile gets its own Matrix store.
 from hermes_constants import get_hermes_dir as _get_hermes_dir
 _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store")
-_CRYPTO_PICKLE_PATH = _STORE_DIR / "crypto_store.pickle"
+_CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"

 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5
@@ -165,6 +166,33 @@ def check_matrix_requirements() -> bool:
    return True


+class _CryptoStateStore:
+    """Adapter that satisfies the mautrix crypto StateStore interface.
+
+    OlmMachine requires a StateStore with ``is_encrypted``,
+    ``get_encryption_info``, and ``find_shared_rooms``.  The basic
+    ``MemoryStateStore`` from ``mautrix.client`` doesn't implement these,
+    so we provide simple implementations that consult the client's room
+    state.
+    """
+
+    def __init__(self, client_state_store: Any, joined_rooms: set):
+        self._ss = client_state_store
+        self._joined_rooms = joined_rooms
+
+    async def is_encrypted(self, room_id: str) -> bool:
+        return (await self.get_encryption_info(room_id)) is not None
+
+    async def get_encryption_info(self, room_id: str):
+        if hasattr(self._ss, "get_encryption_info"):
+            return await self._ss.get_encryption_info(room_id)
+        return None
+
+    async def find_shared_rooms(self, user_id: str) -> list:
+        # Return all joined rooms — simple but correct for a single-user bot.
+        return list(self._joined_rooms)
+
+
 class MatrixAdapter(BasePlatformAdapter):
    """Gateway adapter for Matrix (any homeserver)."""

@@ -199,6 +227,7 @@ class MatrixAdapter(BasePlatformAdapter):
        )

        self._client: Any = None  # mautrix.client.Client
+        self._crypto_db: Any = None  # mautrix.util.async_db.Database
        self._sync_task: Optional[asyncio.Task] = None
        self._closing = False
        self._startup_ts: float = 0.0
@@ -252,6 +281,92 @@ class MatrixAdapter(BasePlatformAdapter):
        self._processed_events_set.add(event_id)
        return False

+    # ------------------------------------------------------------------
+    # E2EE helpers
+    # ------------------------------------------------------------------
+
+    async def _verify_device_keys_on_server(self, client: Any, olm: Any) -> bool:
+        """Verify our device keys are on the homeserver after loading crypto state.
+
+        Returns True if keys are valid or were successfully re-uploaded.
+        Returns False if verification fails (caller should refuse E2EE).
+        """
+        try:
+            resp = await client.query_keys({client.mxid: [client.device_id]})
+        except Exception as exc:
+            logger.error(
+                "Matrix: cannot verify device keys on server: %s — refusing E2EE", exc,
+            )
+            return False
+
+        # query_keys returns typed objects (QueryKeysResponse, DeviceKeys
+        # with KeyID keys).  Normalise to plain strings for comparison.
+        device_keys_map = getattr(resp, "device_keys", {}) or {}
+        our_user_devices = device_keys_map.get(str(client.mxid)) or {}
+        our_keys = our_user_devices.get(str(client.device_id))
+
+        if not our_keys:
+            logger.warning("Matrix: device keys missing from server — re-uploading")
+            olm.account.shared = False
+            try:
+                await olm.share_keys()
+            except Exception as exc:
+                logger.error("Matrix: failed to re-upload device keys: %s", exc)
+                return False
+            return True
+
+        # DeviceKeys.keys is a dict[KeyID, str].  Iterate to find the
+        # ed25519 key rather than constructing a KeyID for lookup.
+        server_ed25519 = None
+        keys_dict = getattr(our_keys, "keys", {}) or {}
+        for key_id, key_value in keys_dict.items():
+            if str(key_id).startswith("ed25519:"):
+                server_ed25519 = str(key_value)
+                break
+        local_ed25519 = olm.account.identity_keys.get("ed25519")
+
+        if server_ed25519 != local_ed25519:
+            if olm.account.shared:
+                # Restored account from DB but server has different keys — corrupted state.
+                logger.error(
+                    "Matrix: server has different identity keys for device %s — "
+                    "local crypto state is stale. Delete %s and restart.",
+                    client.device_id,
+                    _CRYPTO_DB_PATH,
+                )
+                return False
+
+            # Fresh account (never uploaded). Server has stale keys from a
+            # previous installation. Try to delete the old device and re-upload.
+            logger.warning(
+                "Matrix: server has stale keys for device %s — attempting re-upload",
+                client.device_id,
+            )
+            try:
+                await client.api.request(
+                    client.api.Method.DELETE
+                    if hasattr(client.api, "Method")
+                    else "DELETE",
+                    f"/_matrix/client/v3/devices/{client.device_id}",
+                )
+                logger.info("Matrix: deleted stale device %s from server", client.device_id)
+            except Exception:
+                # Device deletion often requires UIA or may simply not be
+                # permitted — that's fine, share_keys will try to overwrite.
+                pass
+            try:
+                await olm.share_keys()
+            except Exception as exc:
+                logger.error(
+                    "Matrix: cannot upload device keys for %s: %s. "
+                    "Try generating a new access token to get a fresh device.",
+                    client.device_id,
+                    exc,
+                )
+                return False
+
+        return True
+
    # ------------------------------------------------------------------
    # Required overrides
    # ------------------------------------------------------------------
@@ -350,54 +465,67 @@ class MatrixAdapter(BasePlatformAdapter):
                return False
            try:
                from mautrix.crypto import OlmMachine
-                from mautrix.crypto.store import MemoryCryptoStore
+                from mautrix.crypto.store.asyncpg import PgCryptoStore
+                from mautrix.util.async_db import Database
+
+                _STORE_DIR.mkdir(parents=True, exist_ok=True)
+
+                # Remove legacy pickle file from pre-SQLite era.
+                legacy_pickle = _STORE_DIR / "crypto_store.pickle"
+                if legacy_pickle.exists():
+                    logger.info("Matrix: removing legacy crypto_store.pickle (migrated to SQLite)")
+                    legacy_pickle.unlink()
+
+                # Open SQLite-backed crypto store.
+                crypto_db = Database.create(
+                    f"sqlite:///{_CRYPTO_DB_PATH}",
+                    upgrade_table=PgCryptoStore.upgrade_table,
+                )
+                await crypto_db.start()
+                self._crypto_db = crypto_db

-                # account_id and pickle_key are required by mautrix ≥0.21.
-                # Use the Matrix user ID as account_id for stable identity.
-                # pickle_key secures in-memory serialisation; derive from
-                # the same user_id:device_id pair used for the on-disk HMAC.
                _acct_id = self._user_id or "hermes"
-                _pickle_key = f"{_acct_id}:{self._device_id}"
-                crypto_store = MemoryCryptoStore(
+                _pickle_key = f"{_acct_id}:{self._device_id or 'default'}"
+                crypto_store = PgCryptoStore(
                    account_id=_acct_id,
                    pickle_key=_pickle_key,
+                    db=crypto_db,
                )
+                await crypto_store.open()

-                # Restore persisted crypto state from a previous run.
-                # Uses HMAC to verify integrity before unpickling.
-                pickle_path = _CRYPTO_PICKLE_PATH
-                if pickle_path.exists():
-                    try:
-                        import hashlib, hmac, pickle
-                        raw = pickle_path.read_bytes()
-                        # Format: 32-byte HMAC-SHA256 signature + pickle data.
-                        if len(raw) > 32:
-                            sig, payload = raw[:32], raw[32:]
-                            # Key is derived from the device_id + user_id (stable per install).
-                            hmac_key = f"{self._user_id}:{self._device_id}".encode()
-                            expected = hmac.new(hmac_key, payload, hashlib.sha256).digest()
-                            if hmac.compare_digest(sig, expected):
-                                saved = pickle.loads(payload)  # noqa: S301
-                                if isinstance(saved, MemoryCryptoStore):
-                                    crypto_store = saved
-                                    logger.info("Matrix: restored E2EE crypto store from %s", pickle_path)
-                            else:
-                                logger.warning("Matrix: crypto store HMAC mismatch — ignoring stale/tampered file")
-                    except Exception as exc:
-                        logger.warning("Matrix: could not restore crypto store: %s", exc)
+                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
+                olm = OlmMachine(client, crypto_store, crypto_state)

-                olm = OlmMachine(client, crypto_store, state_store)
-
-                # Set trust policy: accept unverified devices so senders
-                # share Megolm session keys with us automatically.
+                # Accept unverified devices so senders share Megolm
+                # session keys with us automatically.
                olm.share_keys_min_trust = TrustState.UNVERIFIED
                olm.send_keys_min_trust = TrustState.UNVERIFIED

                await olm.load()
+
+                # Verify our device keys are still on the homeserver.
+                if not await self._verify_device_keys_on_server(client, olm):
+                    await crypto_db.stop()
+                    await api.session.close()
+                    return False
+
+                # Import cross-signing private keys from SSSS and self-sign
+                # the current device. Required after any device-key rotation
+                # (fresh crypto.db, share_keys re-upload) — otherwise the
+                # device's self-signing signature is stale and peers refuse
+                # to share Megolm sessions with the rotated device.
+                recovery_key = os.getenv("MATRIX_RECOVERY_KEY", "").strip()
+                if recovery_key:
+                    try:
+                        await olm.verify_with_recovery_key(recovery_key)
+                        logger.info("Matrix: cross-signing verified via recovery key")
+                    except Exception as exc:
+                        logger.warning("Matrix: recovery key verification failed: %s", exc)
+
                client.crypto = olm
                logger.info(
                    "Matrix: E2EE enabled (store: %s%s)",
-                    str(_STORE_DIR),
+                    str(_CRYPTO_DB_PATH),
                    f", device_id={client.device_id}" if client.device_id else "",
                )
            except Exception as exc:
@@ -438,6 +566,15 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                # Build DM room cache from m.direct account data.
                await self._refresh_dm_cache()
+
+                # Dispatch events from the initial sync so the OlmMachine
+                # receives to-device key shares queued while we were offline.
+                try:
+                    tasks = client.handle_sync(sync_data)
+                    if tasks:
+                        await asyncio.gather(*tasks)
+                except Exception as exc:
+                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
            else:
                logger.warning("Matrix: initial sync returned unexpected type %s", type(sync_data).__name__)
        except Exception as exc:
@@ -466,21 +603,12 @@ class MatrixAdapter(BasePlatformAdapter):
            except (asyncio.CancelledError, Exception):
                pass

-        # Persist E2EE crypto store before closing so the next restart
-        # can decrypt events using sessions from this run.
-        if self._client and self._encryption and getattr(self._client, "crypto", None):
+        # Close the SQLite crypto store database.
+        if hasattr(self, "_crypto_db") and self._crypto_db:
            try:
-                import hashlib, hmac, pickle
-                crypto_store = self._client.crypto.crypto_store
-                _STORE_DIR.mkdir(parents=True, exist_ok=True)
-                pickle_path = _CRYPTO_PICKLE_PATH
-                payload = pickle.dumps(crypto_store)
-                hmac_key = f"{self._user_id}:{self._device_id}".encode()
-                sig = hmac.new(hmac_key, payload, hashlib.sha256).digest()
-                pickle_path.write_bytes(sig + payload)
-                logger.info("Matrix: persisted E2EE crypto store to %s", pickle_path)
+                await self._crypto_db.stop()
            except Exception as exc:
-                logger.debug("Matrix: could not persist crypto store on disconnect: %s", exc)
+                logger.debug("Matrix: could not close crypto DB on disconnect: %s", exc)

        if self._client:
            try:
@@ -654,7 +782,7 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-                async with _aiohttp.ClientSession() as http:
+                async with _aiohttp.ClientSession(trust_env=True) as http:
                    async with http.get(image_url, timeout=_aiohttp.ClientTimeout(total=30)) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -853,13 +981,6 @@ class MatrixAdapter(BasePlatformAdapter):
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)

-                # Share keys periodically if E2EE is enabled.
-                if self._encryption and getattr(client, "crypto", None):
-                    try:
-                        await client.crypto.share_keys()
-                    except Exception as exc:
-                        logger.warning("Matrix: E2EE key share failed: %s", exc)
-
                # Retry any buffered undecrypted events.
                if self._pending_megolm:
                    await self._retry_pending_decryptions()
@@ -1014,7 +1135,10 @@ class MatrixAdapter(BasePlatformAdapter):
            thread_id = relates_to.get("event_id")

        formatted_body = source_content.get("formatted_body")
-        is_mentioned = self._is_bot_mentioned(body, formatted_body)
+        # m.mentions.user_ids (MSC3952 / Matrix v1.7) — authoritative mention signal.
+        mentions_block = source_content.get("m.mentions") or {}
+        mention_user_ids = mentions_block.get("user_ids") if isinstance(mentions_block, dict) else None
+        is_mentioned = self._is_bot_mentioned(body, formatted_body, mention_user_ids)

        # Require-mention gating.
        if not is_dm:
@@ -1701,8 +1825,24 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

-    def _is_bot_mentioned(self, body: str, formatted_body: Optional[str] = None) -> bool:
-        """Return True if the bot is mentioned in the message."""
+    def _is_bot_mentioned(
+        self,
+        body: str,
+        formatted_body: Optional[str] = None,
+        mention_user_ids: Optional[list] = None,
+    ) -> bool:
+        """Return True if the bot is mentioned in the message.
+
+        Per MSC3952, ``m.mentions.user_ids`` is the authoritative mention
+        signal in the Matrix spec.  When the sender's client populates that
+        field with the bot's user-id, we trust it — even when the visible
+        body text does not contain an explicit ``@bot`` string (some clients
+        only render mention "pills" in ``formatted_body`` or use display
+        names).
+        """
+        # m.mentions.user_ids — authoritative per MSC3952 / Matrix v1.7.
+        if mention_user_ids and self._user_id and self._user_id in mention_user_ids:
+            return True
        if not body and not formatted_body:
            return False
        if self._user_id and self._user_id in body:
@@ -65,7 +65,10 @@ from gateway.platforms.base import (
    cache_image_from_bytes,
    cache_audio_from_bytes,
    cache_document_from_bytes,
+    resolve_proxy_url,
    SUPPORTED_DOCUMENT_TYPES,
+    utf16_len,
+    _prefix_within_utf16_limit,
 )
 from gateway.platforms.telegram_network import (
    TelegramFallbackTransport,
@@ -537,10 +540,7 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_configured = any(
-                (os.getenv(k) or "").strip()
-                for k in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy")
-            )
+            proxy_url = resolve_proxy_url()
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -551,7 +551,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

-            if fallback_ips and not proxy_configured and not disable_fallback:
+            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
                    self.name,
@@ -567,10 +567,12 @@ class TelegramAdapter(BasePlatformAdapter):
                    **request_kwargs,
                    httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
                )
+            elif proxy_url:
+                logger.info("[%s] Proxy detected; passing explicitly to HTTPXRequest: %s", self.name, proxy_url)
+                request = HTTPXRequest(**request_kwargs, proxy=proxy_url)
+                get_updates_request = HTTPXRequest(**request_kwargs, proxy=proxy_url)
            else:
-                if proxy_configured:
-                    logger.info("[%s] Proxy configured; skipping Telegram fallback-IP transport", self.name)
-                elif disable_fallback:
+                if disable_fallback:
                    logger.info("[%s] Telegram fallback-IP transport disabled via env", self.name)
                request = HTTPXRequest(**request_kwargs)
                get_updates_request = HTTPXRequest(**request_kwargs)
@@ -799,7 +801,9 @@ class TelegramAdapter(BasePlatformAdapter):
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
-            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            chunks = self.truncate_message(
+                formatted, self.MAX_MESSAGE_LENGTH, len_fn=utf16_len,
+            )
            if len(chunks) > 1:
                # truncate_message appends a raw " (1/2)" suffix. Escape the
                # MarkdownV2-special parentheses so Telegram doesn't reject the
@@ -970,7 +974,9 @@ class TelegramAdapter(BasePlatformAdapter):
            # streaming).  Truncate and succeed so the stream consumer can
            # split the overflow into a new message instead of dying.
            if "message_too_long" in err_str or "too long" in err_str:
-                truncated = content[: self.MAX_MESSAGE_LENGTH - 20] + "…"
+                truncated = _prefix_within_utf16_limit(
+                    content, self.MAX_MESSAGE_LENGTH - 20
+                ) + "…"
                try:
                    await self._bot.edit_message_text(
                        chat_id=int(chat_id),
@@ -266,7 +266,7 @@ class WeComAdapter(BasePlatformAdapter):
    async def _open_connection(self) -> None:
        """Open and authenticate a websocket connection."""
        await self._cleanup_ws()
-        self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession(trust_env=True)
        self._ws = await self._session.ws_connect(
            self._ws_url,
            heartbeat=HEARTBEAT_INTERVAL_SECONDS * 2,
@@ -112,6 +112,7 @@ TYPING_STOP = 2
 _HEADER_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
 _TABLE_RULE_RE = re.compile(r"^\s*\|?(?:\s*:?-{3,}:?\s*\|)+\s*:?-{3,}:?\s*\|?\s*$")
 _FENCE_RE = re.compile(r"^```([^\n`]*)\s*$")
+_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")


 def check_weixin_requirements() -> bool:
@@ -398,15 +399,16 @@ async def _send_message(
    context_token: Optional[str],
    client_id: str,
 ) -> None:
+    if not text or not text.strip():
+        raise ValueError("_send_message: text must not be empty")
    message: Dict[str, Any] = {
        "from_user_id": "",
        "to_user_id": to,
        "client_id": client_id,
        "message_type": MSG_TYPE_BOT,
        "message_state": MSG_STATE_FINISH,
+        "item_list": [{"type": ITEM_TEXT, "text_item": {"text": text}}],
    }
-    if text:
-        message["item_list"] = [{"type": ITEM_TEXT, "text_item": {"text": text}}]
    if context_token:
        message["context_token"] = context_token
    await _api_post(
@@ -499,13 +501,15 @@ async def _upload_ciphertext(
    session: "aiohttp.ClientSession",
    *,
    ciphertext: bytes,
-    cdn_base_url: str,
-    upload_param: str,
-    filekey: str,
+    upload_url: str,
 ) -> str:
-    url = _cdn_upload_url(cdn_base_url, upload_param, filekey)
+    """Upload encrypted media to the CDN.
+
+    Accepts either a constructed CDN URL (from upload_param) or a direct
+    upload_full_url — both use POST with the raw ciphertext as the body.
+    """
    timeout = aiohttp.ClientTimeout(total=120)
-    async with session.post(url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
+    async with session.post(upload_url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
        if response.status == 200:
            encrypted_param = response.headers.get("x-encrypted-param")
            if encrypted_param:
@@ -649,7 +653,7 @@ def _normalize_markdown_blocks(content: str) -> str:
            result.append(_rewrite_table_block_for_weixin(table_lines))
            continue

-        result.append(_rewrite_headers_for_weixin(line))
+        result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
        i += 1

    normalized = "\n".join(item.rstrip() for item in result)
@@ -734,6 +738,42 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:
    return [unit for unit in units if unit]


+def _looks_like_chatty_line_for_weixin(line: str) -> bool:
+    """Return True when a line looks like a standalone chat utterance."""
+    stripped = line.strip()
+    if not stripped:
+        return False
+    if len(stripped) > 48:
+        return False
+    if line.startswith((" ", "\t")):
+        return False
+    if stripped.startswith((">", "-", "*", "【")):
+        return False
+    if re.match(r"^\*\*[^*]+\*\*$", stripped):
+        return False
+    if re.match(r"^\d+\.\s", stripped):
+        return False
+    return True
+
+
+def _looks_like_heading_line_for_weixin(line: str) -> bool:
+    """Return True when a short line behaves like a plain-text heading."""
+    stripped = line.strip()
+    if not stripped:
+        return False
+    return len(stripped) <= 24 and stripped.endswith((":", "："))
+
+
+def _should_split_short_chat_block_for_weixin(block: str) -> bool:
+    """Split only chat-like multiline blocks into separate bubbles."""
+    lines = [line for line in block.splitlines() if line.strip()]
+    if not 2 <= len(lines) <= 6:
+        return False
+    if _looks_like_heading_line_for_weixin(lines[0]):
+        return False
+    return all(_looks_like_chatty_line_for_weixin(line) for line in lines)
+
+
 def _pack_markdown_blocks_for_weixin(content: str, max_length: int) -> List[str]:
    if len(content) <= max_length:
        return [content]
@@ -775,6 +815,8 @@ def _split_text_for_weixin_delivery(
    ``platforms.weixin.extra.split_multiline_messages`` (``true`` / ``false``)
    or the env var ``WEIXIN_SPLIT_MULTILINE_MESSAGES``.
    """
+    if not content:
+        return []
    if split_per_line:
        # Legacy: one message per top-level delivery unit.
        if len(content) <= max_length and "\n" not in content:
@@ -785,11 +827,17 @@ def _split_text_for_weixin_delivery(
                chunks.append(unit)
                continue
            chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length))
-        return chunks or [content]
+        return [c for c in chunks if c] or [content]

-    # Compact (default): single message when under the limit.
+    # Compact (default): single message when under the limit — unless the
+    # content looks like a short chatty exchange, in which case split into
+    # separate bubbles for a more natural chat feel.
    if len(content) <= max_length:
-        return [content]
+        return (
+            [u for u in _split_delivery_units_for_weixin(content) if u]
+            if _should_split_short_chat_block_for_weixin(content)
+            else [content]
+        )
    return _pack_markdown_blocks_for_weixin(content, max_length) or [content]


@@ -887,7 +935,7 @@ async def qr_login(
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError("aiohttp is required for Weixin QR login")

-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(trust_env=True) as session:
        try:
            qr_resp = await _api_get(
                session,
@@ -1000,6 +1048,10 @@ class WeixinAdapter(BasePlatformAdapter):

    MAX_MESSAGE_LENGTH = 4000

+    # WeChat does not support editing sent messages — streaming must use the
+    # fallback "send-final-only" path so the cursor (▉) is never left visible.
+    SUPPORTS_MESSAGE_EDITING = False
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.WEIXIN)
        extra = config.extra or {}
@@ -1082,7 +1134,7 @@ class WeixinAdapter(BasePlatformAdapter):
        except Exception as exc:
            logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)

-        self._session = aiohttp.ClientSession()
+        self._session = aiohttp.ClientSession(trust_env=True)
        self._token_store.restore(self._account_id)
        self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
        self._mark_connected()
@@ -1409,7 +1461,7 @@ class WeixinAdapter(BasePlatformAdapter):
        context_token = self._token_store.get(self._account_id, chat_id)
        last_message_id: Optional[str] = None
        try:
-            chunks = self._split_text(self.format_message(content))
+            chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
            for idx, chunk in enumerate(chunks):
                client_id = f"hermes-weixin-{uuid.uuid4().hex}"
                await self._send_text_chunk(
@@ -1495,24 +1547,51 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        return await self.send_document(chat_id, path, caption=caption, metadata=metadata)
+        return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)

    async def send_document(
        self,
        chat_id: str,
-        path: str,
+        file_path: str,
        caption: str = "",
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        if not self._session or not self._token:
            return SendResult(success=False, error="Not connected")
        try:
-            message_id = await self._send_file(chat_id, path, caption)
+            message_id = await self._send_file(chat_id, file_path, caption)
            return SendResult(success=True, message_id=message_id)
        except Exception as exc:
            logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
            return SendResult(success=False, error=str(exc))

+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        if not self._session or not self._token:
+            return SendResult(success=False, error="Not connected")
+        try:
+            message_id = await self._send_file(chat_id, video_path, caption or "")
+            return SendResult(success=True, message_id=message_id)
+        except Exception as exc:
+            logger.error("[%s] send_video failed to=%s: %s", self.name, _safe_id(chat_id), exc)
+            return SendResult(success=False, error=str(exc))
+
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
+
    async def _download_remote_media(self, url: str) -> str:
        from tools.url_safety import is_safe_url

@@ -1535,6 +1614,7 @@ class WeixinAdapter(BasePlatformAdapter):
        filekey = secrets.token_hex(16)
        aes_key = secrets.token_bytes(16)
        rawsize = len(plaintext)
+        rawfilemd5 = hashlib.md5(plaintext).hexdigest()
        upload_response = await _get_upload_url(
            self._session,
            base_url=self._base_url,
@@ -1543,41 +1623,42 @@ class WeixinAdapter(BasePlatformAdapter):
            media_type=media_type,
            filekey=filekey,
            rawsize=rawsize,
-            rawfilemd5=hashlib.md5(plaintext).hexdigest(),
+            rawfilemd5=rawfilemd5,
            filesize=_aes_padded_size(rawsize),
            aeskey_hex=aes_key.hex(),
        )
        upload_param = str(upload_response.get("upload_param") or "")
        upload_full_url = str(upload_response.get("upload_full_url") or "")
        ciphertext = _aes128_ecb_encrypt(plaintext, aes_key)
-        if upload_param:
-            encrypted_query_param = await _upload_ciphertext(
-                self._session,
-                ciphertext=ciphertext,
-                cdn_base_url=self._cdn_base_url,
-                upload_param=upload_param,
-                filekey=filekey,
-            )
-        elif upload_full_url:
-            timeout = aiohttp.ClientTimeout(total=120)
-            async with self._session.put(
-                upload_full_url,
-                data=ciphertext,
-                headers={"Content-Type": "application/octet-stream"},
-                timeout=timeout,
-            ) as response:
-                response.raise_for_status()
-                encrypted_query_param = response.headers.get("x-encrypted-param") or filekey
+
+        # Prefer upload_full_url (direct CDN), fall back to constructed CDN URL
+        # from upload_param.  Both paths use POST — the old PUT for
+        # upload_full_url caused 404s on the WeChat CDN.
+        if upload_full_url:
+            upload_url = upload_full_url
+        elif upload_param:
+            upload_url = _cdn_upload_url(self._cdn_base_url, upload_param, filekey)
        else:
            raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")

+        encrypted_query_param = await _upload_ciphertext(
+            self._session,
+            ciphertext=ciphertext,
+            upload_url=upload_url,
+        )
+
        context_token = self._token_store.get(self._account_id, chat_id)
+        # The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
+        # Sending base64(raw_bytes) causes images to show as grey boxes on the
+        # receiver side because the decryption key doesn't match.
+        aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
        media_item = item_builder(
            encrypt_query_param=encrypted_query_param,
-            aes_key_b64=base64.b64encode(aes_key).decode("ascii"),
+            aes_key_for_api=aes_key_for_api,
            ciphertext_size=len(ciphertext),
            plaintext_size=rawsize,
            filename=Path(path).name,
+            rawfilemd5=rawfilemd5,
        )

        last_message_id = None
@@ -1617,39 +1698,53 @@ class WeixinAdapter(BasePlatformAdapter):
    def _outbound_media_builder(self, path: str):
        mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
        if mime.startswith("image/"):
-            return MEDIA_IMAGE, lambda **kwargs: {
+            return MEDIA_IMAGE, lambda **kw: {
                "type": ITEM_IMAGE,
                "image_item": {
                    "media": {
-                        "encrypt_query_param": kwargs["encrypt_query_param"],
-                        "aes_key": kwargs["aes_key_b64"],
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
                        "encrypt_type": 1,
                    },
-                    "mid_size": kwargs["ciphertext_size"],
+                    "mid_size": kw["ciphertext_size"],
                },
            }
        if mime.startswith("video/"):
-            return MEDIA_VIDEO, lambda **kwargs: {
+            return MEDIA_VIDEO, lambda **kw: {
                "type": ITEM_VIDEO,
                "video_item": {
                    "media": {
-                        "encrypt_query_param": kwargs["encrypt_query_param"],
-                        "aes_key": kwargs["aes_key_b64"],
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
                        "encrypt_type": 1,
                    },
-                    "video_size": kwargs["ciphertext_size"],
+                    "video_size": kw["ciphertext_size"],
+                    "play_length": kw.get("play_length", 0),
+                    "video_md5": kw.get("rawfilemd5", ""),
                },
            }
-        return MEDIA_FILE, lambda **kwargs: {
+        if mime.startswith("audio/") or path.endswith(".silk"):
+            return MEDIA_VOICE, lambda **kw: {
+                "type": ITEM_VOICE,
+                "voice_item": {
+                    "media": {
+                        "encrypt_query_param": kw["encrypt_query_param"],
+                        "aes_key": kw["aes_key_for_api"],
+                        "encrypt_type": 1,
+                    },
+                    "playtime": kw.get("playtime", 0),
+                },
+            }
+        return MEDIA_FILE, lambda **kw: {
            "type": ITEM_FILE,
            "file_item": {
                "media": {
-                    "encrypt_query_param": kwargs["encrypt_query_param"],
-                    "aes_key": kwargs["aes_key_b64"],
+                    "encrypt_query_param": kw["encrypt_query_param"],
+                    "aes_key": kw["aes_key_for_api"],
                    "encrypt_type": 1,
                },
-                "file_name": kwargs["filename"],
-                "len": str(kwargs["plaintext_size"]),
+                "file_name": kw["filename"],
+                "len": str(kw["plaintext_size"]),
            },
        }

@@ -1689,7 +1784,7 @@ async def send_weixin_direct(
    token_store.restore(account_id)
    context_token = token_store.get(account_id, chat_id)

-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession(trust_env=True) as session:
        adapter = WeixinAdapter(
            PlatformConfig(
                enabled=True,
@@ -120,8 +120,9 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - session_path: Path to store WhatsApp session data
    """
    
-    # WhatsApp message limits
-    MAX_MESSAGE_LENGTH = 65536  # WhatsApp allows longer messages
+    # WhatsApp message limits — practical UX limit, not protocol max.
+    # WhatsApp allows ~65K but long messages are unreadable on mobile.
+    MAX_MESSAGE_LENGTH = 4096
    
    # Default bridge location relative to the hermes-agent install
    _DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
@@ -531,6 +532,63 @@ class WhatsAppAdapter(BasePlatformAdapter):
        self._close_bridge_log()
        print(f"[{self.name}] Disconnected")
    
+    def format_message(self, content: str) -> str:
+        """Convert standard markdown to WhatsApp-compatible formatting.
+
+        WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
+        and monospaced `inline`. Standard markdown uses different syntax
+        for bold/italic/strikethrough, so we convert here.
+
+        Code blocks (``` fenced) and inline code (`) are protected from
+        conversion via placeholder substitution.
+        """
+        if not content:
+            return content
+
+        # --- 1. Protect fenced code blocks from formatting changes ---
+        _FENCE_PH = "\x00FENCE"
+        fences: list[str] = []
+
+        def _save_fence(m: re.Match) -> str:
+            fences.append(m.group(0))
+            return f"{_FENCE_PH}{len(fences) - 1}\x00"
+
+        result = re.sub(r"```[\s\S]*?```", _save_fence, content)
+
+        # --- 2. Protect inline code ---
+        _CODE_PH = "\x00CODE"
+        codes: list[str] = []
+
+        def _save_code(m: re.Match) -> str:
+            codes.append(m.group(0))
+            return f"{_CODE_PH}{len(codes) - 1}\x00"
+
+        result = re.sub(r"`[^`\n]+`", _save_code, result)
+
+        # --- 3. Convert markdown formatting to WhatsApp syntax ---
+        # Bold: **text** or __text__ → *text*
+        result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
+        result = re.sub(r"__(.+?)__", r"*\1*", result)
+        # Strikethrough: ~~text~~ → ~text~
+        result = re.sub(r"~~(.+?)~~", r"~\1~", result)
+        # Italic: *text* is already WhatsApp italic — leave as-is
+        # _text_ is already WhatsApp italic — leave as-is
+
+        # --- 4. Convert markdown headers to bold text ---
+        # # Header → *Header*
+        result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
+
+        # --- 5. Convert markdown links: [text](url) → text (url) ---
+        result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
+
+        # --- 6. Restore protected sections ---
+        for i, fence in enumerate(fences):
+            result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
+        for i, code in enumerate(codes):
+            result = result.replace(f"{_CODE_PH}{i}\x00", code)
+
+        return result
+
    async def send(
        self,
        chat_id: str,
@@ -538,38 +596,57 @@ class WhatsAppAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None
    ) -> SendResult:
-        """Send a message via the WhatsApp bridge."""
+        """Send a message via the WhatsApp bridge.
+
+        Formats markdown for WhatsApp, splits long messages into chunks
+        that preserve code block boundaries, and sends each chunk sequentially.
+        """
        if not self._running or not self._http_session:
            return SendResult(success=False, error="Not connected")
        bridge_exit = await self._check_managed_bridge_exit()
        if bridge_exit:
            return SendResult(success=False, error=bridge_exit)
-        
+
+        if not content or not content.strip():
+            return SendResult(success=True, message_id=None)
+
        try:
            import aiohttp

-            payload = {
-                "chatId": chat_id,
-                "message": content,
-            }
-            if reply_to:
-                payload["replyTo"] = reply_to
-            
-            async with self._http_session.post(
-                f"http://127.0.0.1:{self._bridge_port}/send",
-                json=payload,
-                timeout=aiohttp.ClientTimeout(total=30)
-            ) as resp:
-                if resp.status == 200:
-                    data = await resp.json()
-                    return SendResult(
-                        success=True,
-                        message_id=data.get("messageId"),
-                        raw_response=data
-                    )
-                else:
-                    error = await resp.text()
-                    return SendResult(success=False, error=error)
+            # Format and chunk the message
+            formatted = self.format_message(content)
+            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+
+            last_message_id = None
+            for chunk in chunks:
+                payload: Dict[str, Any] = {
+                    "chatId": chat_id,
+                    "message": chunk,
+                }
+                if reply_to and last_message_id is None:
+                    # Only reply-to on the first chunk
+                    payload["replyTo"] = reply_to
+
+                async with self._http_session.post(
+                    f"http://127.0.0.1:{self._bridge_port}/send",
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=30)
+                ) as resp:
+                    if resp.status == 200:
+                        data = await resp.json()
+                        last_message_id = data.get("messageId")
+                    else:
+                        error = await resp.text()
+                        return SendResult(success=False, error=error)
+
+                # Small delay between chunks to avoid rate limiting
+                if len(chunks) > 1:
+                    await asyncio.sleep(0.3)
+
+            return SendResult(
+                success=True,
+                message_id=last_message_id,
+            )
        except Exception as e:
            return SendResult(success=False, error=str(e))

@@ -186,6 +186,8 @@ if _config_path.exists():
                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
+            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
+                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
        _display_cfg = _cfg.get("display", {})
@@ -206,6 +208,15 @@ if _config_path.exists():
    except Exception:
        pass  # Non-fatal; gateway can still run with .env values

+# Apply IPv4 preference if configured (before any HTTP clients are created).
+try:
+    from hermes_constants import apply_ipv4_preference
+    _network_cfg = (_cfg if '_cfg' in dir() else {}).get("network", {})
+    if isinstance(_network_cfg, dict) and _network_cfg.get("force_ipv4"):
+        apply_ipv4_preference(force=True)
+except Exception:
+    pass
+
 # Validate config structure early — log warnings so gateway operators see problems
 try:
    from hermes_cli.config import print_config_warnings
@@ -867,13 +878,47 @@ class GatewayRunner:
                "api_mode": override.get("api_mode"),
            }
            if override_runtime.get("api_key"):
+                logger.debug(
+                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
+                    (resolved_session_key or "")[:30], model, override_model,
+                    override_runtime.get("provider"),
+                )
                return override_model, override_runtime
+            # Override exists but has no api_key — fall through to env-based
+            # resolution and apply model/provider from the override on top.
+            logger.debug(
+                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
+                (resolved_session_key or "")[:30], model, override_model,
+            )
+        else:
+            logger.debug(
+                "No session model override: session=%s config_model=%s override_keys=%s",
+                (resolved_session_key or "")[:30], model,
+                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
+            )

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        if override and resolved_session_key:
            model, runtime_kwargs = self._apply_session_model_override(
                resolved_session_key, model, runtime_kwargs
            )
+
+        # When the config has no model.default but a provider was resolved
+        # (e.g. user ran `hermes auth add openai-codex` without `hermes model`),
+        # fall back to the provider's first catalog model so the API call
+        # doesn't fail with "model must be a non-empty string".
+        if not model and runtime_kwargs.get("provider"):
+            try:
+                from hermes_cli.models import get_default_model_for_provider
+                model = get_default_model_for_provider(runtime_kwargs["provider"])
+                if model:
+                    logger.info(
+                        "No model configured — defaulting to %s for provider %s",
+                        model, runtime_kwargs["provider"],
+                    )
+            except Exception:
+                pass
+
        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@@ -916,6 +961,12 @@ class GatewayRunner:
            adapter.fatal_error_code or "unknown",
            adapter.fatal_error_message or "unknown error",
        )
+        self._update_platform_runtime_status(
+            adapter.platform.value,
+            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
+            error_code=adapter.fatal_error_code,
+            error_message=adapter.fatal_error_message,
+        )

        existing = self.adapters.get(adapter.platform)
        if existing is adapter:
@@ -993,6 +1044,25 @@ class GatewayRunner:
            )
        except Exception:
            pass
+
+    def _update_platform_runtime_status(
+        self,
+        platform: str,
+        *,
+        platform_state: Optional[str] = None,
+        error_code: Optional[str] = None,
+        error_message: Optional[str] = None,
+    ) -> None:
+        try:
+            from gateway.status import write_runtime_status
+            write_runtime_status(
+                platform=platform,
+                platform_state=platform_state,
+                error_code=error_code,
+                error_message=error_message,
+            )
+        except Exception:
+            pass
    
    @staticmethod
    def _load_prefill_messages() -> List[Dict[str, Any]]:
@@ -1467,12 +1537,25 @@ class GatewayRunner:
        # This prevents stuck sessions from being blindly resumed on restart,
        # which can create an unrecoverable loop (#7536).  Suspended sessions
        # auto-reset on the next incoming message, giving the user a clean start.
-        try:
-            suspended = self.session_store.suspend_recently_active()
-            if suspended:
-                logger.info("Suspended %d in-flight session(s) from previous run", suspended)
-        except Exception as e:
-            logger.warning("Session suspension on startup failed: %s", e)
+        #
+        # SKIP suspension after a clean (graceful) shutdown — the previous
+        # process already drained active agents, so sessions aren't stuck.
+        # This prevents unwanted auto-resets after `hermes update`,
+        # `hermes gateway restart`, or `/restart`.
+        _clean_marker = _hermes_home / ".clean_shutdown"
+        if _clean_marker.exists():
+            logger.info("Previous gateway exited cleanly — skipping session suspension")
+            try:
+                _clean_marker.unlink()
+            except Exception:
+                pass
+        else:
+            try:
+                suspended = self.session_store.suspend_recently_active()
+                if suspended:
+                    logger.info("Suspended %d in-flight session(s) from previous run", suspended)
+            except Exception as e:
+                logger.warning("Session suspension on startup failed: %s", e)

        connected_count = 0
        enabled_platform_count = 0
@@ -1498,16 +1581,34 @@ class GatewayRunner:
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
+            self._update_platform_runtime_status(
+                platform.value,
+                platform_state="connecting",
+                error_code=None,
+                error_message=None,
+            )
            try:
                success = await adapter.connect()
                if success:
                    self.adapters[platform] = adapter
                    self._sync_voice_mode_state_to_adapter(adapter)
                    connected_count += 1
+                    self._update_platform_runtime_status(
+                        platform.value,
+                        platform_state="connected",
+                        error_code=None,
+                        error_message=None,
+                    )
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
                    if adapter.has_fatal_error:
+                        self._update_platform_runtime_status(
+                            platform.value,
+                            platform_state="retrying" if adapter.fatal_error_retryable else "fatal",
+                            error_code=adapter.fatal_error_code,
+                            error_message=adapter.fatal_error_message,
+                        )
                        target = (
                            startup_retryable_errors
                            if adapter.fatal_error_retryable
@@ -1524,6 +1625,12 @@ class GatewayRunner:
                                "next_retry": time.monotonic() + 30,
                            }
                    else:
+                        self._update_platform_runtime_status(
+                            platform.value,
+                            platform_state="retrying",
+                            error_code=None,
+                            error_message="failed to connect",
+                        )
                        startup_retryable_errors.append(
                            f"{platform.value}: failed to connect"
                        )
@@ -1535,6 +1642,12 @@ class GatewayRunner:
                        }
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
+                self._update_platform_runtime_status(
+                    platform.value,
+                    platform_state="retrying",
+                    error_code=None,
+                    error_message=str(e),
+                )
                startup_retryable_errors.append(f"{platform.value}: {e}")
                # Unexpected exceptions are typically transient — queue for retry
                self._failed_platforms[platform] = {
@@ -1604,6 +1717,9 @@ class GatewayRunner:
        ):
            self._schedule_update_notification_watch()

+        # Notify the chat that initiated /restart that the gateway is back.
+        await self._send_restart_notification()
+
        # Drain any recovered process watchers (from crash recovery checkpoint)
        try:
            from tools.process_registry import process_registry
@@ -1813,6 +1929,12 @@ class GatewayRunner:
                        self._sync_voice_mode_state_to_adapter(adapter)
                        self.delivery_router.adapters = self.adapters
                        del self._failed_platforms[platform]
+                        self._update_platform_runtime_status(
+                            platform.value,
+                            platform_state="connected",
+                            error_code=None,
+                            error_message=None,
+                        )
                        logger.info("✓ %s reconnected successfully", platform.value)

                        # Rebuild channel directory with the new adapter
@@ -1824,12 +1946,24 @@ class GatewayRunner:
                    else:
                        # Check if the failure is non-retryable
                        if adapter.has_fatal_error and not adapter.fatal_error_retryable:
+                            self._update_platform_runtime_status(
+                                platform.value,
+                                platform_state="fatal",
+                                error_code=adapter.fatal_error_code,
+                                error_message=adapter.fatal_error_message,
+                            )
                            logger.warning(
                                "Reconnect %s: non-retryable error (%s), removing from retry queue",
                                platform.value, adapter.fatal_error_message,
                            )
                            del self._failed_platforms[platform]
                        else:
+                            self._update_platform_runtime_status(
+                                platform.value,
+                                platform_state="retrying",
+                                error_code=adapter.fatal_error_code,
+                                error_message=adapter.fatal_error_message or "failed to reconnect",
+                            )
                            backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                            info["attempts"] = attempt
                            info["next_retry"] = time.monotonic() + backoff
@@ -1838,6 +1972,12 @@ class GatewayRunner:
                                platform.value, backoff,
                            )
                except Exception as e:
+                    self._update_platform_runtime_status(
+                        platform.value,
+                        platform_state="retrying",
+                        error_code=None,
+                        error_message=str(e),
+                    )
                    backoff = min(30 * (2 ** (attempt - 1)), _BACKOFF_CAP)
                    info["attempts"] = attempt
                    info["next_retry"] = time.monotonic() + backoff
@@ -1944,6 +2084,15 @@ class GatewayRunner:
            from gateway.status import remove_pid_file
            remove_pid_file()

+            # Write a clean-shutdown marker so the next startup knows this
+            # wasn't a crash.  suspend_recently_active() only needs to run
+            # after unexpected exits — graceful shutdowns already drain
+            # active agents, so there's no stuck-session risk.
+            try:
+                (_hermes_home / ".clean_shutdown").touch()
+            except Exception:
+                pass
+
            if self._restart_requested and self._restart_via_service:
                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
                self._exit_reason = self._exit_reason or "Gateway restart requested"
@@ -2611,6 +2760,9 @@ class GatewayRunner:
        if canonical == "update":
            return await self._handle_update_command(event)

+        if canonical == "debug":
+            return await self._handle_debug_command(event)
+
        if canonical == "title":
            return await self._handle_title_command(event)

@@ -3877,9 +4029,16 @@ class GatewayRunner:
        except Exception:
            pass

+        # Append a random tip to the reset message
+        try:
+            from hermes_cli.tips import get_random_tip
+            _tip_line = f"\n✦ Tip: {get_random_tip()}"
+        except Exception:
+            _tip_line = ""
+
        if session_info:
-            return f"{header}\n\n{session_info}"
-        return header
+            return f"{header}\n\n{session_info}{_tip_line}"
+        return f"{header}{_tip_line}"
    
    async def _handle_profile_command(self, event: MessageEvent) -> str:
        """Handle /profile — show active profile name and home directory."""
@@ -3991,11 +4150,36 @@ class GatewayRunner:
                return f"⏳ Draining {count} active agent(s) before restart..."
            return "⏳ Gateway restart already in progress..."

+        # Save the requester's routing info so the new gateway process can
+        # notify them once it comes back online.
+        try:
+            import json as _json
+            notify_data = {
+                "platform": event.source.platform.value if event.source.platform else None,
+                "chat_id": event.source.chat_id,
+            }
+            if event.source.thread_id:
+                notify_data["thread_id"] = event.source.thread_id
+            (_hermes_home / ".restart_notify.json").write_text(
+                _json.dumps(notify_data)
+            )
+        except Exception as e:
+            logger.debug("Failed to write restart notify file: %s", e)
+
        active_agents = self._running_agent_count()
-        self.request_restart(detached=True, via_service=False)
+        # When running under a service manager (systemd/launchd), use the
+        # service restart path: exit with code 75 so the service manager
+        # restarts us.  The detached subprocess approach (setsid + bash)
+        # doesn't work under systemd because KillMode=mixed kills all
+        # processes in the cgroup, including the detached helper.
+        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
+        if _under_service:
+            self.request_restart(detached=False, via_service=True)
+        else:
+            self.request_restart(detached=True, via_service=False)
        if active_agents:
            return f"⏳ Draining {active_agents} active agent(s) before restart..."
-        return "♻ Restarting gateway..."
+        return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."

    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
@@ -4209,6 +4393,11 @@ class GatewayRunner:
                            "api_mode": result.api_mode,
                        }

+                        # Evict cached agent so the next turn creates a fresh
+                        # agent from the override rather than relying on the
+                        # stale cache signature to trigger a rebuild.
+                        _self._evict_cached_agent(_session_key)
+
                        # Build confirmation text
                        plabel = result.provider_label or result.target_provider
                        lines = [f"Model switched to `{result.new_model}`"]
@@ -4322,6 +4511,10 @@ class GatewayRunner:
            "api_mode": result.api_mode,
        }

+        # Evict cached agent so the next turn creates a fresh agent from the
+        # override rather than relying on cache signature mismatch detection.
+        self._evict_cached_agent(session_key)
+
        # Persist to config if --global
        if persist_global:
            try:
@@ -4734,6 +4927,8 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
+            if hasattr(adapter, "_voice_sources"):
+                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[event.source.chat_id] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -4794,14 +4989,23 @@ class GatewayRunner:
        if not text_ch_id:
            return

+        # Build source — reuse the linked text channel's metadata when available
+        # so voice input shares the same session as the bound text conversation.
+        source_data = getattr(adapter, "_voice_sources", {}).get(guild_id)
+        if source_data:
+            source = SessionSource.from_dict(source_data)
+            source.user_id = str(user_id)
+            source.user_name = str(user_id)
+        else:
+            source = SessionSource(
+                platform=Platform.DISCORD,
+                chat_id=str(text_ch_id),
+                user_id=str(user_id),
+                user_name=str(user_id),
+                chat_type="channel",
+            )
+
        # Check authorization before processing voice input
-        source = SessionSource(
-            platform=Platform.DISCORD,
-            chat_id=str(text_ch_id),
-            user_id=str(user_id),
-            user_name=str(user_id),
-            chat_type="channel",
-        )
        if not self._is_user_authorized(source):
            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
            return
@@ -5637,7 +5841,12 @@ class GatewayRunner:
            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"

    async def _handle_compress_command(self, event: MessageEvent) -> str:
-        """Handle /compress command -- manually compress conversation context."""
+        """Handle /compress command -- manually compress conversation context.
+
+        Accepts an optional focus topic: ``/compress <focus>`` guides the
+        summariser to preserve information related to *focus* while being
+        more aggressive about discarding everything else.
+        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
        history = self.session_store.load_transcript(session_entry.session_id)
@@ -5645,6 +5854,9 @@ class GatewayRunner:
        if not history or len(history) < 4:
            return "Not enough conversation to compress (need at least 4 messages)."

+        # Extract optional focus topic from command args
+        focus_topic = (event.get_command_args() or "").strip() or None
+
        try:
            from run_agent import AIAgent
            from agent.manual_compression_feedback import summarize_manual_compression
@@ -5686,7 +5898,7 @@ class GatewayRunner:
            loop = asyncio.get_event_loop()
            compressed, _ = await loop.run_in_executor(
                None,
-                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens)
+                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
            )

            # _compress_context already calls end_session() on the old session
@@ -5710,7 +5922,10 @@ class GatewayRunner:
                approx_tokens,
                new_tokens,
            )
-            lines = [f"🗜️ {summary['headline']}", summary["token_line"]]
+            lines = [f"🗜️ {summary['headline']}"]
+            if focus_topic:
+                lines.append(f"Focus: \"{focus_topic}\"")
+            lines.append(summary["token_line"])
            if summary["note"]:
                lines.append(summary["note"])
            return "\n".join(lines)
@@ -6255,6 +6470,61 @@ class GatewayRunner:
        Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.LOCAL,
    })

+    async def _handle_debug_command(self, event: MessageEvent) -> str:
+        """Handle /debug — upload debug report + logs and return paste URLs."""
+        import asyncio
+        from hermes_cli.debug import (
+            _capture_dump, collect_debug_report, _read_full_log,
+            upload_to_pastebin,
+        )
+
+        loop = asyncio.get_running_loop()
+
+        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
+        def _collect_and_upload():
+            dump_text = _capture_dump()
+            report = collect_debug_report(log_lines=200, dump_text=dump_text)
+            agent_log = _read_full_log("agent")
+            gateway_log = _read_full_log("gateway")
+
+            if agent_log:
+                agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
+            if gateway_log:
+                gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+
+            urls = {}
+            failures = []
+
+            try:
+                urls["Report"] = upload_to_pastebin(report)
+            except Exception as exc:
+                return f"✗ Failed to upload debug report: {exc}"
+
+            if agent_log:
+                try:
+                    urls["agent.log"] = upload_to_pastebin(agent_log)
+                except Exception:
+                    failures.append("agent.log")
+
+            if gateway_log:
+                try:
+                    urls["gateway.log"] = upload_to_pastebin(gateway_log)
+                except Exception:
+                    failures.append("gateway.log")
+
+            lines = ["**Debug report uploaded:**", ""]
+            label_width = max(len(k) for k in urls)
+            for label, url in urls.items():
+                lines.append(f"`{label:<{label_width}}`  {url}")
+
+            if failures:
+                lines.append(f"\n_(failed to upload: {', '.join(failures)})_")
+
+            lines.append("\nShare these links with the Hermes team for support.")
+            return "\n".join(lines)
+
+        return await loop.run_in_executor(None, _collect_and_upload)
+
    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

@@ -6497,8 +6767,12 @@ class GatewayRunner:
            if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
                await _flush_buffer()

-            # Check for prompts
-            if prompt_path.exists() and session_key:
+            # Check for prompts — only forward if we haven't already sent
+            # one that's still awaiting a response.  Without this guard the
+            # watcher would re-read the same .update_prompt.json every poll
+            # cycle and spam the user with duplicate prompt messages.
+            if (prompt_path.exists() and session_key
+                    and not self._update_prompt_pending.get(session_key)):
                try:
                    prompt_data = json.loads(prompt_path.read_text())
                    prompt_text = prompt_data.get("prompt", "")
@@ -6530,6 +6804,11 @@ class GatewayRunner:
                                f"or type your answer directly."
                            )
                        self._update_prompt_pending[session_key] = True
+                        # Remove the prompt file so it isn't re-read on the
+                        # next poll cycle.  The update process only needs
+                        # .update_response to continue — it doesn't re-check
+                        # .update_prompt.json while waiting.
+                        prompt_path.unlink(missing_ok=True)
                        logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
                except (json.JSONDecodeError, OSError) as e:
                    logger.debug("Failed to read update prompt: %s", e)
@@ -6640,6 +6919,48 @@ class GatewayRunner:

        return True

+    async def _send_restart_notification(self) -> None:
+        """Notify the chat that initiated /restart that the gateway is back."""
+        import json as _json
+
+        notify_path = _hermes_home / ".restart_notify.json"
+        if not notify_path.exists():
+            return
+
+        try:
+            data = _json.loads(notify_path.read_text())
+            platform_str = data.get("platform")
+            chat_id = data.get("chat_id")
+            thread_id = data.get("thread_id")
+
+            if not platform_str or not chat_id:
+                return
+
+            platform = Platform(platform_str)
+            adapter = self.adapters.get(platform)
+            if not adapter:
+                logger.debug(
+                    "Restart notification skipped: %s adapter not connected",
+                    platform_str,
+                )
+                return
+
+            metadata = {"thread_id": thread_id} if thread_id else None
+            await adapter.send(
+                chat_id,
+                "♻ Gateway restarted successfully. Your session continues.",
+                metadata=metadata,
+            )
+            logger.info(
+                "Sent restart notification to %s:%s",
+                platform_str,
+                chat_id,
+            )
+        except Exception as e:
+            logger.warning("Restart notification failed: %s", e)
+        finally:
+            notify_path.unlink(missing_ok=True)
+
    def _set_session_env(self, context: SessionContext) -> list:
        """Set session context variables for the current async task.

@@ -6903,7 +7224,9 @@ class GatewayRunner:

            if session.exited:
                # --- Agent-triggered completion: inject synthetic message ---
-                if agent_notify:
+                # Skip if the agent already consumed the result via wait/poll/log
+                from tools.process_registry import process_registry as _pr_check
+                if agent_notify and not _pr_check.is_completion_consumed(session_id):
                    from tools.ansi_strip import strip_ansi
                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
                    synth_text = (
@@ -7169,9 +7492,11 @@ class GatewayRunner:
                    _pl = get_tool_preview_max_len()
                    import json as _json
                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
-                    _cap = _pl if _pl > 0 else 200
-                    if len(args_str) > _cap:
-                        args_str = args_str[:_cap - 3] + "..."
+                    # When tool_preview_length is 0 (default), don't truncate
+                    # in verbose mode — the user explicitly asked for full
+                    # detail.  Platform message-length limits handle the rest.
+                    if _pl > 0 and len(args_str) > _pl:
+                        args_str = args_str[:_pl - 3] + "..."
                    msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                elif preview:
                    msg = f"{emoji} {tool_name}: \"{preview}\""
@@ -7437,6 +7762,10 @@ class GatewayRunner:
                    session_key=session_key,
                    user_config=user_config,
                )
+                logger.debug(
+                    "run_agent resolved: model=%s provider=%s session=%s",
+                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                )
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
@@ -7477,10 +7806,18 @@ class GatewayRunner:
                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
+                        # Platforms that don't support editing sent messages
+                        # (e.g. WeChat) must not show a cursor in intermediate
+                        # sends — the cursor would be permanently visible because
+                        # it can never be edited away.  Use an empty cursor for
+                        # such platforms so streaming still delivers the final
+                        # response, just without the typing indicator.
+                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
+                        _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
-                            cursor=_scfg.cursor,
+                            cursor=_effective_cursor,
                        )
                        _stream_consumer = GatewayStreamConsumer(
                            adapter=_adapter,
@@ -7911,35 +8248,66 @@ class GatewayRunner:
        
        tracking_task = asyncio.create_task(track_agent())
        
-        # Monitor for interrupts from the adapter (new messages arriving)
+        # Monitor for interrupts from the adapter (new messages arriving).
+        # This is the PRIMARY interrupt path for regular text messages —
+        # Level 1 (base.py) catches them before _handle_message() is reached,
+        # so the Level 2 running_agent.interrupt() path never fires.
+        # The inactivity poll loop below has a BACKUP check in case this
+        # task dies (no error handling = silent death = lost interrupts).
+        _interrupt_detected = asyncio.Event()  # shared with backup check
+
        async def monitor_for_interrupt():
-            adapter = self.adapters.get(source.platform)
-            if not adapter or not session_key:
+            if not session_key:
                return
-            
+
            while True:
                await asyncio.sleep(0.2)  # Check every 200ms
-                # Check if adapter has a pending interrupt for this session.
-                # Must use session_key (build_session_key output) — NOT
-                # source.chat_id — because the adapter stores interrupt events
-                # under the full session key.
-                if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(session_key):
-                    agent = agent_holder[0]
-                    if agent:
-                        pending_event = adapter.get_pending_message(session_key)
-                        pending_text = pending_event.text if pending_event else None
-                        logger.debug("Interrupt detected from adapter, signaling agent...")
-                        agent.interrupt(pending_text)
-                        break
+                try:
+                    # Re-resolve adapter each iteration so reconnects don't
+                    # leave us holding a stale reference.
+                    _adapter = self.adapters.get(source.platform)
+                    if not _adapter:
+                        continue
+                    # Check if adapter has a pending interrupt for this session.
+                    # Must use session_key (build_session_key output) — NOT
+                    # source.chat_id — because the adapter stores interrupt events
+                    # under the full session key.
+                    if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
+                        agent = agent_holder[0]
+                        if agent:
+                            # Peek at the pending message text WITHOUT consuming it.
+                            # The message must remain in _pending_messages so the
+                            # post-run dequeue at _dequeue_pending_event() can
+                            # retrieve the full MessageEvent (with media metadata).
+                            # If we pop here, a race exists: the agent may finish
+                            # before checking _interrupt_requested, and the message
+                            # is lost — neither the interrupt path nor the dequeue
+                            # path finds it.
+                            _peek_event = _adapter._pending_messages.get(session_key)
+                            pending_text = _peek_event.text if _peek_event else None
+                            logger.debug("Interrupt detected from adapter, signaling agent...")
+                            agent.interrupt(pending_text)
+                            _interrupt_detected.set()
+                            break
+                except asyncio.CancelledError:
+                    raise
+                except Exception as _mon_err:
+                    logger.debug("monitor_for_interrupt error (will retry): %s", _mon_err)
        
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())

        # Periodic "still working" notifications for long-running tasks.
-        # Fires every 10 minutes so the user knows the agent hasn't died.
-        _NOTIFY_INTERVAL = 600  # 10 minutes
+        # Fires every N seconds so the user knows the agent hasn't died.
+        # Config: agent.gateway_notify_interval in config.yaml, or
+        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
+        # 0 = disable notifications.
+        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
+        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

        async def _notify_long_running():
+            if _NOTIFY_INTERVAL is None:
+                return  # Notifications disabled (gateway_notify_interval: 0)
            _notify_adapter = self.adapters.get(source.platform)
            if not _notify_adapter:
                return
@@ -7995,8 +8363,34 @@ class GatewayRunner:
            _POLL_INTERVAL = 5.0

            if _agent_timeout is None:
-                # Unlimited — just await the result.
-                response = await _executor_task
+                # Unlimited — still poll periodically for backup interrupt
+                # detection in case monitor_for_interrupt() silently died.
+                response = None
+                while True:
+                    done, _ = await asyncio.wait(
+                        {_executor_task}, timeout=_POLL_INTERVAL
+                    )
+                    if done:
+                        response = _executor_task.result()
+                        break
+                    # Backup interrupt check: if the monitor task died or
+                    # missed the interrupt, catch it here.
+                    if not _interrupt_detected.is_set() and session_key:
+                        _backup_adapter = self.adapters.get(source.platform)
+                        _backup_agent = agent_holder[0]
+                        if (_backup_adapter and _backup_agent
+                                and hasattr(_backup_adapter, 'has_pending_interrupt')
+                                and _backup_adapter.has_pending_interrupt(session_key)):
+                            _bp_event = _backup_adapter._pending_messages.get(session_key)
+                            _bp_text = _bp_event.text if _bp_event else None
+                            logger.info(
+                                "Backup interrupt detected for session %s "
+                                "(monitor task state: %s)",
+                                session_key[:20],
+                                "done" if interrupt_monitor.done() else "running",
+                            )
+                            _backup_agent.interrupt(_bp_text)
+                            _interrupt_detected.set()
            else:
                # Poll loop: check the agent's built-in activity tracker
                # (updated by _touch_activity() on every tool call, API
@@ -8040,6 +8434,23 @@ class GatewayRunner:
                    if _idle_secs >= _agent_timeout:
                        _inactivity_timeout = True
                        break
+                    # Backup interrupt check (same as unlimited path).
+                    if not _interrupt_detected.is_set() and session_key:
+                        _backup_adapter = self.adapters.get(source.platform)
+                        _backup_agent = agent_holder[0]
+                        if (_backup_adapter and _backup_agent
+                                and hasattr(_backup_adapter, 'has_pending_interrupt')
+                                and _backup_adapter.has_pending_interrupt(session_key)):
+                            _bp_event = _backup_adapter._pending_messages.get(session_key)
+                            _bp_text = _bp_event.text if _bp_event else None
+                            logger.info(
+                                "Backup interrupt detected for session %s "
+                                "(monitor task state: %s)",
+                                session_key[:20],
+                                "done" if interrupt_monitor.done() else "running",
+                            )
+                            _backup_agent.interrupt(_bp_text)
+                            _interrupt_detected.set()

            if _inactivity_timeout:
                # Build a diagnostic summary from the agent's activity tracker.
@@ -8458,23 +8869,11 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    except Exception:
        pass

-    # Centralized logging — agent.log (INFO+) and errors.log (WARNING+).
+    # Centralized logging — agent.log (INFO+), errors.log (WARNING+),
+    # and gateway.log (INFO+, gateway-component records only).
    # Idempotent, so repeated calls from AIAgent.__init__ won't duplicate.
    from hermes_logging import setup_logging
-    log_dir = setup_logging(hermes_home=_hermes_home, mode="gateway")
-
-    # Gateway-specific rotating log — captures all gateway-level messages
-    # (session management, platform adapters, slash commands, etc.).
-    from agent.redact import RedactingFormatter
-    from hermes_logging import _add_rotating_handler
-    _add_rotating_handler(
-        logging.getLogger(),
-        log_dir / 'gateway.log',
-        level=logging.INFO,
-        max_bytes=5 * 1024 * 1024,
-        backup_count=3,
-        formatter=RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s'),
-    )
+    setup_logging(hermes_home=_hermes_home, mode="gateway")

    # Optional stderr handler — level driven by -v/-q flags on the CLI.
    # verbosity=None (-q/--quiet): no stderr output
@@ -8482,6 +8881,8 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # verbosity=1    (-v):         INFO and above
    # verbosity=2+   (-vv/-vvv):   DEBUG
    if verbosity is not None:
+        from agent.redact import RedactingFormatter
+
        _stderr_level = {0: logging.WARNING, 1: logging.INFO}.get(verbosity, logging.DEBUG)
        _stderr_handler = logging.StreamHandler()
        _stderr_handler.setLevel(_stderr_level)
@@ -8501,16 +8902,19 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
        runner.request_restart(detached=False, via_service=True)
    
    loop = asyncio.get_event_loop()
-    for sig in (signal.SIGINT, signal.SIGTERM):
-        try:
-            loop.add_signal_handler(sig, shutdown_signal_handler)
-        except NotImplementedError:
-            pass
-    if hasattr(signal, "SIGUSR1"):
-        try:
-            loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
-        except NotImplementedError:
-            pass
+    if threading.current_thread() is threading.main_thread():
+        for sig in (signal.SIGINT, signal.SIGTERM):
+            try:
+                loop.add_signal_handler(sig, shutdown_signal_handler)
+            except NotImplementedError:
+                pass
+        if hasattr(signal, "SIGUSR1"):
+            try:
+                loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
+            except NotImplementedError:
+                pass
+    else:
+        logger.info("Skipping signal handlers (not running in main thread).")
    
    # Start the gateway
    success = await runner.start()
@@ -807,9 +807,9 @@ class SessionStore:
        to avoid resetting long-idle sessions that are harmless to resume.
        Returns the number of sessions that were suspended.
        """
-        import time as _time
+        from datetime import timedelta

-        cutoff = _time.time() - max_age_seconds
+        cutoff = _now() - timedelta(seconds=max_age_seconds)
        count = 0
        with self._lock:
            self._ensure_loaded_locked()
@@ -878,7 +878,8 @@ class SessionStore:
        Used by ``/resume`` to restore a previously-named session.
        Ends the current session in SQLite (like reset), but instead of
        generating a fresh session ID, re-uses ``target_session_id`` so the
-        old transcript is loaded on the next message.
+        old transcript is loaded on the next message. If the target session was
+        previously ended, re-open it so gateway resume semantics match the CLI.
        """
        db_end_session_id = None
        new_entry = None
@@ -918,6 +919,12 @@ class SessionStore:
            except Exception as e:
                logger.debug("Session DB end_session failed: %s", e)

+        if self._db:
+            try:
+                self._db.reopen_session(target_session_id)
+            except Exception as e:
+                logger.debug("Session DB reopen_session failed: %s", e)
+
        return new_entry

    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
@@ -26,6 +26,7 @@ _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
+_UNSET = object()


 def _get_pid_path() -> Path:
@@ -218,14 +219,14 @@ def write_pid_file() -> None:

 def write_runtime_status(
    *,
-    gateway_state: Optional[str] = None,
-    exit_reason: Optional[str] = None,
-    restart_requested: Optional[bool] = None,
-    active_agents: Optional[int] = None,
-    platform: Optional[str] = None,
-    platform_state: Optional[str] = None,
-    error_code: Optional[str] = None,
-    error_message: Optional[str] = None,
+    gateway_state: Any = _UNSET,
+    exit_reason: Any = _UNSET,
+    restart_requested: Any = _UNSET,
+    active_agents: Any = _UNSET,
+    platform: Any = _UNSET,
+    platform_state: Any = _UNSET,
+    error_code: Any = _UNSET,
+    error_message: Any = _UNSET,
 ) -> None:
    """Persist gateway runtime health information for diagnostics/status."""
    path = _get_runtime_status_path()
@@ -236,22 +237,22 @@ def write_runtime_status(
    payload["start_time"] = _get_process_start_time(os.getpid())
    payload["updated_at"] = _utc_now_iso()

-    if gateway_state is not None:
+    if gateway_state is not _UNSET:
        payload["gateway_state"] = gateway_state
-    if exit_reason is not None:
+    if exit_reason is not _UNSET:
        payload["exit_reason"] = exit_reason
-    if restart_requested is not None:
+    if restart_requested is not _UNSET:
        payload["restart_requested"] = bool(restart_requested)
-    if active_agents is not None:
+    if active_agents is not _UNSET:
        payload["active_agents"] = max(0, int(active_agents))

-    if platform is not None:
+    if platform is not _UNSET:
        platform_payload = payload["platforms"].get(platform, {})
-        if platform_state is not None:
+        if platform_state is not _UNSET:
            platform_payload["state"] = platform_state
-        if error_code is not None:
+        if error_code is not _UNSET:
            platform_payload["error_code"] = error_code
-        if error_message is not None:
+        if error_message is not _UNSET:
            platform_payload["error_message"] = error_message
        platform_payload["updated_at"] = _utc_now_iso()
        payload["platforms"][platform] = platform_payload
@@ -289,6 +290,15 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
    }

    existing = _read_json_file(lock_path)
+    if existing is None and lock_path.exists():
+        # Lock file exists but is empty or contains invalid JSON — treat as
+        # stale.  This happens when a previous process was killed between
+        # O_CREAT|O_EXCL and the subsequent json.dump() (e.g. DNS failure
+        # during rapid Slack reconnect retries).
+        try:
+            lock_path.unlink(missing_ok=True)
+        except OSError:
+            pass
    if existing:
        try:
            existing_pid = int(existing["pid"])
@@ -127,6 +127,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url=DEFAULT_GITHUB_MODELS_BASE_URL,
        api_key_env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
+        base_url_env_var="COPILOT_API_BASE_URL",
    ),
    "copilot-acp": ProviderConfig(
        id="copilot-acp",
@@ -307,44 +308,6 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) ->
    return default_url


-def _gh_cli_candidates() -> list[str]:
-    """Return candidate ``gh`` binary paths, including common Homebrew installs."""
-    candidates: list[str] = []
-
-    resolved = shutil.which("gh")
-    if resolved:
-        candidates.append(resolved)
-
-    for candidate in (
-        "/opt/homebrew/bin/gh",
-        "/usr/local/bin/gh",
-        str(Path.home() / ".local" / "bin" / "gh"),
-    ):
-        if candidate in candidates:
-            continue
-        if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
-            candidates.append(candidate)
-
-    return candidates
-
-
-def _try_gh_cli_token() -> Optional[str]:
-    """Return a token from ``gh auth token`` when the GitHub CLI is available."""
-    for gh_path in _gh_cli_candidates():
-        try:
-            result = subprocess.run(
-                [gh_path, "auth", "token"],
-                capture_output=True,
-                text=True,
-                timeout=5,
-            )
-        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
-            logger.debug("gh CLI token lookup failed (%s): %s", gh_path, exc)
-            continue
-        if result.returncode == 0 and result.stdout.strip():
-            return result.stdout.strip()
-    return None
-

 _PLACEHOLDER_SECRET_VALUES = {
    "*",
@@ -1303,6 +1266,49 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    }


+def _write_codex_cli_tokens(
+    access_token: str,
+    refresh_token: str,
+    *,
+    last_refresh: Optional[str] = None,
+) -> None:
+    """Write refreshed tokens back to ~/.codex/auth.json.
+
+    OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
+    When Hermes refreshes a token it consumes the old refresh_token; if we
+    don't write the new pair back, the Codex CLI (or VS Code extension) will
+    fail with ``refresh_token_reused`` on its next refresh attempt.
+
+    This mirrors the Anthropic write-back to ~/.claude/.credentials.json
+    via ``_write_claude_code_credentials()``.
+    """
+    codex_home = os.getenv("CODEX_HOME", "").strip()
+    if not codex_home:
+        codex_home = str(Path.home() / ".codex")
+    auth_path = Path(codex_home).expanduser() / "auth.json"
+    try:
+        existing: Dict[str, Any] = {}
+        if auth_path.is_file():
+            existing = json.loads(auth_path.read_text(encoding="utf-8"))
+        if not isinstance(existing, dict):
+            existing = {}
+
+        tokens_dict = existing.get("tokens")
+        if not isinstance(tokens_dict, dict):
+            tokens_dict = {}
+        tokens_dict["access_token"] = access_token
+        tokens_dict["refresh_token"] = refresh_token
+        existing["tokens"] = tokens_dict
+        if last_refresh is not None:
+            existing["last_refresh"] = last_refresh
+
+        auth_path.parent.mkdir(parents=True, exist_ok=True)
+        auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        auth_path.chmod(0o600)
+    except (OSError, IOError) as exc:
+        logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
+
+
 def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
    """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
    if last_refresh is None:
@@ -1425,6 +1431,12 @@ def _refresh_codex_auth_tokens(
    updated_tokens["refresh_token"] = refreshed["refresh_token"]

    _save_codex_tokens(updated_tokens)
+    # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
+    _write_codex_cli_tokens(
+        refreshed["access_token"],
+        refreshed["refresh_token"],
+        last_refresh=refreshed.get("last_refresh"),
+    )
    return updated_tokens


@@ -0,0 +1,655 @@
+"""
+Backup and import commands for hermes CLI.
+
+`hermes backup` creates a zip archive of the entire ~/.hermes/ directory
+(excluding the hermes-agent repo and transient files).
+
+`hermes import` restores from a backup zip, overlaying onto the current
+HERMES_HOME root.
+"""
+
+import json
+import logging
+import os
+import shutil
+import sqlite3
+import sys
+import tempfile
+import time
+import zipfile
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from hermes_constants import get_default_hermes_root, get_hermes_home, display_hermes_home
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Exclusion rules
+# ---------------------------------------------------------------------------
+
+# Directory names to skip entirely (matched against each path component)
+_EXCLUDED_DIRS = {
+    "hermes-agent",     # the codebase repo — re-clone instead
+    "__pycache__",      # bytecode caches — regenerated on import
+    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
+    "node_modules",     # js deps if website/ somehow leaks in
+}
+
+# File-name suffixes to skip
+_EXCLUDED_SUFFIXES = (
+    ".pyc",
+    ".pyo",
+)
+
+# File names to skip (runtime state that's meaningless on another machine)
+_EXCLUDED_NAMES = {
+    "gateway.pid",
+    "cron.pid",
+}
+
+
+def _should_exclude(rel_path: Path) -> bool:
+    """Return True if *rel_path* (relative to hermes root) should be skipped."""
+    parts = rel_path.parts
+
+    # Any path component matches an excluded dir name
+    for part in parts:
+        if part in _EXCLUDED_DIRS:
+            return True
+
+    name = rel_path.name
+
+    if name in _EXCLUDED_NAMES:
+        return True
+
+    if name.endswith(_EXCLUDED_SUFFIXES):
+        return True
+
+    return False
+
+
+# ---------------------------------------------------------------------------
+# SQLite safe copy
+# ---------------------------------------------------------------------------
+
+def _safe_copy_db(src: Path, dst: Path) -> bool:
+    """Copy a SQLite database safely using the backup() API.
+
+    Handles WAL mode — produces a consistent snapshot even while
+    the DB is being written to.  Falls back to raw copy on failure.
+    """
+    try:
+        conn = sqlite3.connect(f"file:{src}?mode=ro", uri=True)
+        backup_conn = sqlite3.connect(str(dst))
+        conn.backup(backup_conn)
+        backup_conn.close()
+        conn.close()
+        return True
+    except Exception as exc:
+        logger.warning("SQLite safe copy failed for %s: %s", src, exc)
+        try:
+            shutil.copy2(src, dst)
+            return True
+        except Exception as exc2:
+            logger.error("Raw copy also failed for %s: %s", src, exc2)
+            return False
+
+
+# ---------------------------------------------------------------------------
+# Backup
+# ---------------------------------------------------------------------------
+
+def _format_size(nbytes: int) -> str:
+    """Human-readable file size."""
+    for unit in ("B", "KB", "MB", "GB"):
+        if nbytes < 1024:
+            return f"{nbytes:.1f} {unit}" if unit != "B" else f"{nbytes} {unit}"
+        nbytes /= 1024
+    return f"{nbytes:.1f} TB"
+
+
+def run_backup(args) -> None:
+    """Create a zip backup of the Hermes home directory."""
+    hermes_root = get_default_hermes_root()
+
+    if not hermes_root.is_dir():
+        print(f"Error: Hermes home directory not found at {hermes_root}")
+        sys.exit(1)
+
+    # Determine output path
+    if args.output:
+        out_path = Path(args.output).expanduser().resolve()
+        # If user gave a directory, put the zip inside it
+        if out_path.is_dir():
+            stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+            out_path = out_path / f"hermes-backup-{stamp}.zip"
+    else:
+        stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+        out_path = Path.home() / f"hermes-backup-{stamp}.zip"
+
+    # Ensure the suffix is .zip
+    if out_path.suffix.lower() != ".zip":
+        out_path = out_path.with_suffix(out_path.suffix + ".zip")
+
+    # Ensure parent directory exists
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Collect files
+    print(f"Scanning {display_hermes_home()} ...")
+    files_to_add: list[tuple[Path, Path]] = []  # (absolute, relative)
+    skipped_dirs = set()
+
+    for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+        dp = Path(dirpath)
+        rel_dir = dp.relative_to(hermes_root)
+
+        # Prune excluded directories in-place so os.walk doesn't descend
+        orig_dirnames = dirnames[:]
+        dirnames[:] = [
+            d for d in dirnames
+            if d not in _EXCLUDED_DIRS
+        ]
+        for removed in set(orig_dirnames) - set(dirnames):
+            skipped_dirs.add(str(rel_dir / removed))
+
+        for fname in filenames:
+            fpath = dp / fname
+            rel = fpath.relative_to(hermes_root)
+
+            if _should_exclude(rel):
+                continue
+
+            # Skip the output zip itself if it happens to be inside hermes root
+            try:
+                if fpath.resolve() == out_path.resolve():
+                    continue
+            except (OSError, ValueError):
+                pass
+
+            files_to_add.append((fpath, rel))
+
+    if not files_to_add:
+        print("No files to back up.")
+        return
+
+    # Create the zip
+    file_count = len(files_to_add)
+    print(f"Backing up {file_count} files ...")
+
+    total_bytes = 0
+    errors = []
+    t0 = time.monotonic()
+
+    with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+        for i, (abs_path, rel_path) in enumerate(files_to_add, 1):
+            try:
+                # Safe copy for SQLite databases (handles WAL mode)
+                if abs_path.suffix == ".db":
+                    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                        tmp_db = Path(tmp.name)
+                    if _safe_copy_db(abs_path, tmp_db):
+                        zf.write(tmp_db, arcname=str(rel_path))
+                        total_bytes += tmp_db.stat().st_size
+                        tmp_db.unlink(missing_ok=True)
+                    else:
+                        tmp_db.unlink(missing_ok=True)
+                        errors.append(f"  {rel_path}: SQLite safe copy failed")
+                        continue
+                else:
+                    zf.write(abs_path, arcname=str(rel_path))
+                    total_bytes += abs_path.stat().st_size
+            except (PermissionError, OSError) as exc:
+                errors.append(f"  {rel_path}: {exc}")
+                continue
+
+            # Progress every 500 files
+            if i % 500 == 0:
+                print(f"  {i}/{file_count} files ...")
+
+    elapsed = time.monotonic() - t0
+    zip_size = out_path.stat().st_size
+
+    # Summary
+    print()
+    print(f"Backup complete: {out_path}")
+    print(f"  Files:       {file_count}")
+    print(f"  Original:    {_format_size(total_bytes)}")
+    print(f"  Compressed:  {_format_size(zip_size)}")
+    print(f"  Time:        {elapsed:.1f}s")
+
+    if skipped_dirs:
+        print(f"\n  Excluded directories:")
+        for d in sorted(skipped_dirs):
+            print(f"    {d}/")
+
+    if errors:
+        print(f"\n  Warnings ({len(errors)} files skipped):")
+        for e in errors[:10]:
+            print(e)
+        if len(errors) > 10:
+            print(f"  ... and {len(errors) - 10} more")
+
+    print(f"\nRestore with: hermes import {out_path.name}")
+
+
+# ---------------------------------------------------------------------------
+# Import
+# ---------------------------------------------------------------------------
+
+def _validate_backup_zip(zf: zipfile.ZipFile) -> tuple[bool, str]:
+    """Check that a zip looks like a Hermes backup.
+
+    Returns (ok, reason).
+    """
+    names = zf.namelist()
+    if not names:
+        return False, "zip archive is empty"
+
+    # Look for telltale files that a hermes home would have
+    markers = {"config.yaml", ".env", "state.db"}
+    found = set()
+    for n in names:
+        # Could be at the root or one level deep (if someone zipped the directory)
+        basename = Path(n).name
+        if basename in markers:
+            found.add(basename)
+
+    if not found:
+        return False, (
+            "zip does not appear to be a Hermes backup "
+            "(no config.yaml, .env, or state databases found)"
+        )
+
+    return True, ""
+
+
+def _detect_prefix(zf: zipfile.ZipFile) -> str:
+    """Detect if the zip has a common directory prefix wrapping all entries.
+
+    Some tools zip as `.hermes/config.yaml` instead of `config.yaml`.
+    Returns the prefix to strip (empty string if none).
+    """
+    names = [n for n in zf.namelist() if not n.endswith("/")]
+    if not names:
+        return ""
+
+    # Find common prefix
+    parts_list = [Path(n).parts for n in names]
+
+    # Check if all entries share a common first directory
+    first_parts = {p[0] for p in parts_list if len(p) > 1}
+    if len(first_parts) == 1:
+        prefix = first_parts.pop()
+        # Only strip if it looks like a hermes dir name
+        if prefix in (".hermes", "hermes"):
+            return prefix + "/"
+
+    return ""
+
+
+def run_import(args) -> None:
+    """Restore a Hermes backup from a zip file."""
+    zip_path = Path(args.zipfile).expanduser().resolve()
+
+    if not zip_path.is_file():
+        print(f"Error: File not found: {zip_path}")
+        sys.exit(1)
+
+    if not zipfile.is_zipfile(zip_path):
+        print(f"Error: Not a valid zip file: {zip_path}")
+        sys.exit(1)
+
+    hermes_root = get_default_hermes_root()
+
+    with zipfile.ZipFile(zip_path, "r") as zf:
+        # Validate
+        ok, reason = _validate_backup_zip(zf)
+        if not ok:
+            print(f"Error: {reason}")
+            sys.exit(1)
+
+        prefix = _detect_prefix(zf)
+        members = [n for n in zf.namelist() if not n.endswith("/")]
+        file_count = len(members)
+
+        print(f"Backup contains {file_count} files")
+        print(f"Target: {display_hermes_home()}")
+
+        if prefix:
+            print(f"Detected archive prefix: {prefix!r} (will be stripped)")
+
+        # Check for existing installation
+        has_config = (hermes_root / "config.yaml").exists()
+        has_env = (hermes_root / ".env").exists()
+
+        if (has_config or has_env) and not args.force:
+            print()
+            print("Warning: Target directory already has Hermes configuration.")
+            print("Importing will overwrite existing files with backup contents.")
+            print()
+            try:
+                answer = input("Continue? [y/N] ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                print("\nAborted.")
+                sys.exit(1)
+            if answer not in ("y", "yes"):
+                print("Aborted.")
+                return
+
+        # Extract
+        print(f"\nImporting {file_count} files ...")
+        hermes_root.mkdir(parents=True, exist_ok=True)
+
+        errors = []
+        restored = 0
+        t0 = time.monotonic()
+
+        for member in members:
+            # Strip prefix if detected
+            if prefix and member.startswith(prefix):
+                rel = member[len(prefix):]
+            else:
+                rel = member
+
+            if not rel:
+                continue
+
+            target = hermes_root / rel
+
+            # Security: reject absolute paths and traversals
+            try:
+                target.resolve().relative_to(hermes_root.resolve())
+            except ValueError:
+                errors.append(f"  {rel}: path traversal blocked")
+                continue
+
+            try:
+                target.parent.mkdir(parents=True, exist_ok=True)
+                with zf.open(member) as src, open(target, "wb") as dst:
+                    dst.write(src.read())
+                restored += 1
+            except (PermissionError, OSError) as exc:
+                errors.append(f"  {rel}: {exc}")
+
+            if restored % 500 == 0:
+                print(f"  {restored}/{file_count} files ...")
+
+        elapsed = time.monotonic() - t0
+
+        # Summary
+        print()
+        print(f"Import complete: {restored} files restored in {elapsed:.1f}s")
+        print(f"  Target: {display_hermes_home()}")
+
+        if errors:
+            print(f"\n  Warnings ({len(errors)} files skipped):")
+            for e in errors[:10]:
+                print(e)
+            if len(errors) > 10:
+                print(f"  ... and {len(errors) - 10} more")
+
+        # Post-import: restore profile wrapper scripts
+        profiles_dir = hermes_root / "profiles"
+        restored_profiles = []
+        if profiles_dir.is_dir():
+            try:
+                from hermes_cli.profiles import (
+                    create_wrapper_script, check_alias_collision,
+                    _is_wrapper_dir_in_path, _get_wrapper_dir,
+                )
+                for entry in sorted(profiles_dir.iterdir()):
+                    if not entry.is_dir():
+                        continue
+                    profile_name = entry.name
+                    # Only create wrappers for directories with config
+                    if not (entry / "config.yaml").exists() and not (entry / ".env").exists():
+                        continue
+                    collision = check_alias_collision(profile_name)
+                    if collision:
+                        print(f"  Skipped alias '{profile_name}': {collision}")
+                        restored_profiles.append((profile_name, False))
+                    else:
+                        wrapper = create_wrapper_script(profile_name)
+                        restored_profiles.append((profile_name, wrapper is not None))
+
+                if restored_profiles:
+                    created = [n for n, ok in restored_profiles if ok]
+                    skipped = [n for n, ok in restored_profiles if not ok]
+                    if created:
+                        print(f"\n  Profile aliases restored: {', '.join(created)}")
+                    if skipped:
+                        print(f"  Profile aliases skipped:  {', '.join(skipped)}")
+                    if not _is_wrapper_dir_in_path():
+                        print(f"\n  Note: {_get_wrapper_dir()} is not in your PATH.")
+                        print('  Add to your shell config (~/.bashrc or ~/.zshrc):')
+                        print('    export PATH="$HOME/.local/bin:$PATH"')
+            except ImportError:
+                # hermes_cli.profiles might not be available (fresh install)
+                if any(profiles_dir.iterdir()):
+                    print(f"\n  Profiles detected but aliases could not be created.")
+                    print(f"  Run: hermes profile list  (after installing hermes)")
+
+        # Guidance
+        print()
+        if not (hermes_root / "hermes-agent").is_dir():
+            print("Note: The hermes-agent codebase was not included in the backup.")
+            print("  If this is a fresh install, run: hermes update")
+
+        if restored_profiles:
+            gw_profiles = [n for n, _ in restored_profiles]
+            print("\nTo re-enable gateway services for profiles:")
+            for pname in gw_profiles:
+                print(f"  hermes -p {pname} gateway install")
+
+        print("Done. Your Hermes configuration has been restored.")
+
+
+# ---------------------------------------------------------------------------
+# Quick state snapshots (used by /snapshot slash command and hermes backup --quick)
+# ---------------------------------------------------------------------------
+
+# Critical state files to include in quick snapshots (relative to HERMES_HOME).
+# Everything else is either regeneratable (logs, cache) or managed separately
+# (skills, repo, sessions/).
+_QUICK_STATE_FILES = (
+    "state.db",
+    "config.yaml",
+    ".env",
+    "auth.json",
+    "cron/jobs.json",
+    "gateway_state.json",
+    "channel_directory.json",
+    "processes.json",
+)
+
+_QUICK_SNAPSHOTS_DIR = "state-snapshots"
+_QUICK_DEFAULT_KEEP = 20
+
+
+def _quick_snapshot_root(hermes_home: Optional[Path] = None) -> Path:
+    home = hermes_home or get_hermes_home()
+    return home / _QUICK_SNAPSHOTS_DIR
+
+
+def create_quick_snapshot(
+    label: Optional[str] = None,
+    hermes_home: Optional[Path] = None,
+) -> Optional[str]:
+    """Create a quick state snapshot of critical files.
+
+    Copies STATE_FILES to a timestamped directory under state-snapshots/.
+    Auto-prunes old snapshots beyond the keep limit.
+
+    Returns:
+        Snapshot ID (timestamp-based), or None if no files found.
+    """
+    home = hermes_home or get_hermes_home()
+    root = _quick_snapshot_root(home)
+
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+    snap_id = f"{ts}-{label}" if label else ts
+    snap_dir = root / snap_id
+    snap_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest: Dict[str, int] = {}  # rel_path -> file size
+
+    for rel in _QUICK_STATE_FILES:
+        src = home / rel
+        if not src.exists() or not src.is_file():
+            continue
+
+        dst = snap_dir / rel
+        dst.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            if src.suffix == ".db":
+                if not _safe_copy_db(src, dst):
+                    continue
+            else:
+                shutil.copy2(src, dst)
+            manifest[rel] = dst.stat().st_size
+        except (OSError, PermissionError) as exc:
+            logger.warning("Could not snapshot %s: %s", rel, exc)
+
+    if not manifest:
+        shutil.rmtree(snap_dir, ignore_errors=True)
+        return None
+
+    # Write manifest
+    meta = {
+        "id": snap_id,
+        "timestamp": ts,
+        "label": label,
+        "file_count": len(manifest),
+        "total_size": sum(manifest.values()),
+        "files": manifest,
+    }
+    with open(snap_dir / "manifest.json", "w") as f:
+        json.dump(meta, f, indent=2)
+
+    # Auto-prune
+    _prune_quick_snapshots(root, keep=_QUICK_DEFAULT_KEEP)
+
+    logger.info("State snapshot created: %s (%d files)", snap_id, len(manifest))
+    return snap_id
+
+
+def list_quick_snapshots(
+    limit: int = 20,
+    hermes_home: Optional[Path] = None,
+) -> List[Dict[str, Any]]:
+    """List existing quick state snapshots, most recent first."""
+    root = _quick_snapshot_root(hermes_home)
+    if not root.exists():
+        return []
+
+    results = []
+    for d in sorted(root.iterdir(), reverse=True):
+        if not d.is_dir():
+            continue
+        manifest_path = d / "manifest.json"
+        if manifest_path.exists():
+            try:
+                with open(manifest_path) as f:
+                    results.append(json.load(f))
+            except (json.JSONDecodeError, OSError):
+                results.append({"id": d.name, "file_count": 0, "total_size": 0})
+        if len(results) >= limit:
+            break
+
+    return results
+
+
+def restore_quick_snapshot(
+    snapshot_id: str,
+    hermes_home: Optional[Path] = None,
+) -> bool:
+    """Restore state from a quick snapshot.
+
+    Overwrites current state files with the snapshot's copies.
+    Returns True if at least one file was restored.
+    """
+    home = hermes_home or get_hermes_home()
+    root = _quick_snapshot_root(home)
+    snap_dir = root / snapshot_id
+
+    if not snap_dir.is_dir():
+        return False
+
+    manifest_path = snap_dir / "manifest.json"
+    if not manifest_path.exists():
+        return False
+
+    with open(manifest_path) as f:
+        meta = json.load(f)
+
+    restored = 0
+    for rel in meta.get("files", {}):
+        src = snap_dir / rel
+        if not src.exists():
+            continue
+
+        dst = home / rel
+        dst.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            if dst.suffix == ".db":
+                # Atomic-ish replace for databases
+                tmp = dst.parent / f".{dst.name}.snap_restore"
+                shutil.copy2(src, tmp)
+                dst.unlink(missing_ok=True)
+                shutil.move(str(tmp), str(dst))
+            else:
+                shutil.copy2(src, dst)
+            restored += 1
+        except (OSError, PermissionError) as exc:
+            logger.error("Failed to restore %s: %s", rel, exc)
+
+    logger.info("Restored %d files from snapshot %s", restored, snapshot_id)
+    return restored > 0
+
+
+def _prune_quick_snapshots(root: Path, keep: int = _QUICK_DEFAULT_KEEP) -> int:
+    """Remove oldest quick snapshots beyond the keep limit. Returns count deleted."""
+    if not root.exists():
+        return 0
+
+    dirs = sorted(
+        (d for d in root.iterdir() if d.is_dir()),
+        key=lambda d: d.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for d in dirs[keep:]:
+        try:
+            shutil.rmtree(d)
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune snapshot %s: %s", d.name, exc)
+
+    return deleted
+
+
+def prune_quick_snapshots(
+    keep: int = _QUICK_DEFAULT_KEEP,
+    hermes_home: Optional[Path] = None,
+) -> int:
+    """Manually prune quick snapshots. Returns count deleted."""
+    return _prune_quick_snapshots(_quick_snapshot_root(hermes_home), keep=keep)
+
+
+def run_quick_backup(args) -> None:
+    """CLI entry point for hermes backup --quick."""
+    label = getattr(args, "label", None)
+    snap_id = create_quick_snapshot(label=label)
+    if snap_id:
+        print(f"State snapshot created: {snap_id}")
+        snaps = list_quick_snapshots()
+        print(f"  {len(snaps)} snapshot(s) stored in {display_hermes_home()}/state-snapshots/")
+        print(f"  Restore with: /snapshot restore {snap_id}")
+    else:
+        print("No state files found to snapshot.")
@@ -11,6 +11,7 @@ Usage:

 import importlib.util
 import logging
+import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -50,7 +51,100 @@ _OPENCLAW_SCRIPT_INSTALLED = (
 )

 # Known OpenClaw directory names (current + legacy)
-_OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moldbot")
+_OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moltbot")
+
+def _detect_openclaw_processes() -> list[str]:
+    """Detect running OpenClaw processes and services.
+
+    Returns a list of human-readable descriptions of what was found.
+    An empty list means nothing was detected.
+    """
+    found: list[str] = []
+
+    # -- systemd service (Linux) ------------------------------------------
+    if sys.platform != "win32":
+        try:
+            result = subprocess.run(
+                ["systemctl", "--user", "is-active", "openclaw-gateway.service"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                found.append("systemd service: openclaw-gateway.service")
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+    # -- process scan ------------------------------------------------------
+    if sys.platform == "win32":
+        try:
+            for exe in ("openclaw.exe", "clawd.exe"):
+                result = subprocess.run(
+                    ["tasklist", "/FI", f"IMAGENAME eq {exe}"],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if exe in result.stdout.lower():
+                    found.append(f"process: {exe}")
+
+            # Node.js-hosted OpenClaw — tasklist doesn't show command lines,
+            # so fall back to PowerShell.
+            ps_cmd = (
+                'Get-CimInstance Win32_Process -Filter "Name = \'node.exe\'" | '
+                'Where-Object { $_.CommandLine -match "openclaw|clawd" } | '
+                'Select-Object -First 1 ProcessId'
+            )
+            result = subprocess.run(
+                ["powershell", "-NoProfile", "-Command", ps_cmd],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip():
+                found.append(f"node.exe process with openclaw in command line (PID {result.stdout.strip()})")
+        except Exception:
+            pass
+    else:
+        try:
+            result = subprocess.run(
+                ["pgrep", "-f", "openclaw"],
+                capture_output=True, text=True, timeout=3,
+            )
+            if result.returncode == 0:
+                pids = result.stdout.strip().split()
+                found.append(f"openclaw process(es) (PIDs: {', '.join(pids)})")
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+    return found
+
+
+def _warn_if_openclaw_running(auto_yes: bool) -> None:
+    """Warn if OpenClaw is still running before migration.
+
+    Telegram, Discord, and Slack only allow one active connection per bot
+    token. Migrating while OpenClaw is running causes both to fight for the
+    same token.
+    """
+    running = _detect_openclaw_processes()
+    if not running:
+        return
+
+    print()
+    print_error("OpenClaw appears to be running:")
+    for detail in running:
+        print_info(f"  * {detail}")
+    print_info(
+        "Messaging platforms (Telegram, Discord, Slack) only allow one "
+        "active session per bot token. If you continue, both OpenClaw and "
+        "Hermes may try to use the same token, causing disconnects."
+    )
+    print_info("Recommendation: stop OpenClaw before migrating.")
+    print()
+    if auto_yes:
+        return
+    if not sys.stdin.isatty():
+        print_info("Non-interactive session — continuing to preview only.")
+        return
+    if not prompt_yes_no("Continue anyway?", default=False):
+        print_info("Migration cancelled. Stop OpenClaw and try again.")
+        sys.exit(0)
+

 def _warn_if_gateway_running(auto_yes: bool) -> None:
    """Check if a Hermes gateway is running with connected platforms.
@@ -87,8 +181,8 @@ def _warn_if_gateway_running(auto_yes: bool) -> None:
        print_info("Migration cancelled. Stop the gateway and try again.")
        sys.exit(0)

-# State files commonly found in OpenClaw workspace directories that cause
-# confusion after migration (the agent discovers them and writes to them)
+# State files commonly found in OpenClaw workspace directories — listed
+# during cleanup to help the user decide whether to archive
 _WORKSPACE_STATE_GLOBS = (
    "*/todo.json",
    "*/sessions/*",
@@ -133,7 +227,7 @@ def _find_openclaw_dirs() -> list[Path]:


 def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
-    """Scan an OpenClaw directory for workspace state files that cause confusion.
+    """Scan an OpenClaw directory for workspace state files.

    Returns a list of (path, description) tuples.
    """
@@ -216,7 +310,7 @@ def _cmd_migrate(args):
        source_dir = Path.home() / ".openclaw"
        if not source_dir.is_dir():
            # Try legacy directory names
-            for legacy in (".clawdbot", ".moldbot"):
+            for legacy in (".clawdbot", ".moltbot"):
                candidate = Path.home() / legacy
                if candidate.is_dir():
                    source_dir = candidate
@@ -287,8 +381,11 @@ def _cmd_migrate(args):
        print_info(f"Workspace:   {workspace_target}")
    print()

-    # Check if a gateway is running with connected platforms — migrating tokens
-    # while the gateway is active will cause conflicts (e.g. Telegram 409).
+    # Check if OpenClaw is still running — migrating tokens while both are
+    # active will cause conflicts (e.g. Telegram 409).
+    _warn_if_openclaw_running(auto_yes)
+
+    # Check if a Hermes gateway is running with connected platforms.
    _warn_if_gateway_running(auto_yes)

    # Ensure config.yaml exists before migration tries to read it
@@ -384,65 +481,16 @@ def _cmd_migrate(args):
    # Print results
    _print_migration_report(report, dry_run=False)

-    # After successful migration, offer to archive the source directory
-    if report.get("summary", {}).get("migrated", 0) > 0:
-        _offer_source_archival(source_dir, auto_yes)
-
-
-def _offer_source_archival(source_dir: Path, auto_yes: bool = False):
-    """After migration, offer to rename the source directory to prevent state fragmentation.
-
-    OpenClaw workspace directories contain state files (todo.json, sessions, etc.)
-    that the agent may discover and write to, causing confusion. Renaming the
-    directory prevents this.
-    """
-    if not source_dir.is_dir():
-        return
-
-    # Scan for state files that could cause problems
-    state_files = _scan_workspace_state(source_dir)
-
-    print()
-    print_header("Post-Migration Cleanup")
-    print_info("The OpenClaw directory still exists and contains workspace state files")
-    print_info("that can confuse the agent (todo lists, sessions, logs).")
-    if state_files:
-        print()
-        print(color("  Found state files:", Colors.YELLOW))
-        # Show up to 10 most relevant findings
-        for path, desc in state_files[:10]:
-            print(f"      {desc}")
-        if len(state_files) > 10:
-            print(f"      ... and {len(state_files) - 10} more")
-    print()
-    print_info(f"Recommend: rename {source_dir.name}/ to {source_dir.name}.pre-migration/")
-    print_info("This prevents the agent from discovering old workspace directories.")
-    print_info("You can always rename it back if needed.")
-    print()
-
-    if not auto_yes and not sys.stdin.isatty():
-        print_info("Non-interactive session — skipping archival.")
-        print_info("Run later with: hermes claw cleanup")
-        return
-
-    if auto_yes or prompt_yes_no(f"Archive {source_dir} now?", default=True):
-        try:
-            archive_path = _archive_directory(source_dir)
-            print_success(f"Archived: {source_dir} → {archive_path}")
-            print_info("The original directory has been renamed, not deleted.")
-            print_info(f"To undo: mv {archive_path} {source_dir}")
-        except OSError as e:
-            print_error(f"Could not archive: {e}")
-            print_info(f"You can do it manually: mv {source_dir} {source_dir}.pre-migration")
-    else:
-        print_info("Skipped. You can archive later with: hermes claw cleanup")
+    # Source directory is left untouched — archiving is not the migration
+    # tool's responsibility.  Users who want to clean up can run
+    # 'hermes claw cleanup' separately.


 def _cmd_cleanup(args):
    """Archive leftover OpenClaw directories after migration.

    Scans for OpenClaw directories that still exist after migration and offers
-    to rename them to .pre-migration to prevent state fragmentation.
+    to rename them to .pre-migration to free disk space.
    """
    dry_run = getattr(args, "dry_run", False)
    auto_yes = getattr(args, "yes", False)
@@ -479,6 +527,28 @@ def _cmd_cleanup(args):
        print_success("No OpenClaw directories found. Nothing to clean up.")
        return

+    # Warn if OpenClaw is still running — archiving while the service is
+    # active causes it to recreate an empty skeleton directory (#8502).
+    running = _detect_openclaw_processes()
+    if running:
+        print()
+        print_error("OpenClaw appears to be still running:")
+        for detail in running:
+            print_info(f"  * {detail}")
+        print_info(
+            "Archiving .openclaw/ while the service is active may cause it to "
+            "immediately recreate an empty skeleton directory, destroying your config."
+        )
+        print_info("Stop OpenClaw first: systemctl --user stop openclaw-gateway.service")
+        print()
+        if not auto_yes:
+            if not sys.stdin.isatty():
+                print_info("Non-interactive session — aborting. Stop OpenClaw and re-run.")
+                return
+            if not prompt_yes_no("Proceed anyway?", default=False):
+                print_info("Aborted. Stop OpenClaw first, then re-run: hermes claw cleanup")
+                return
+
    total_archived = 0

    for source_dir in dirs_to_check:
@@ -517,7 +587,7 @@ def _cmd_cleanup(args):

        if state_files:
            print()
-            print(color(f"  {len(state_files)} state file(s) that could cause confusion:", Colors.YELLOW))
+            print(color(f"  {len(state_files)} state file(s) found:", Colors.YELLOW))
            for path, desc in state_files[:8]:
                print(f"      {desc}")
            if len(state_files) > 8:
@@ -69,9 +69,12 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="[name]"),
    CommandDef("branch", "Branch the current session (explore a different path)", "Session",
               aliases=("fork",), args_hint="[name]"),
-    CommandDef("compress", "Manually compress conversation context", "Session"),
+    CommandDef("compress", "Manually compress conversation context", "Session",
+               args_hint="[focus topic]"),
    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
               args_hint="[number]"),
+    CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
+               aliases=("snap",), args_hint="[create|restore <id>|prune]"),
    CommandDef("stop", "Kill all running background processes", "Session"),
    CommandDef("approve", "Approve a pending dangerous command", "Session",
               gateway_only=True, args_hint="[session|always]"),
@@ -128,6 +131,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
@@ -153,6 +157,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="<path>"),
    CommandDef("update", "Update Hermes Agent to the latest version", "Info",
               gateway_only=True),
+    CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),

    # Exit
    CommandDef("quit", "Exit the CLI", "Exit",
@@ -50,6 +50,7 @@ _EXTRA_ENV_KEYS = frozenset({
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
+    "MATRIX_RECOVERY_KEY",
 })
 import yaml

@@ -147,25 +148,6 @@ def managed_error(action: str = "modify configuration"):
 # Container-aware CLI (NixOS container mode)
 # =============================================================================

-def _is_inside_container() -> bool:
-    """Detect if we're already running inside a Docker/Podman container."""
-    # Standard Docker/Podman indicators
-    if os.path.exists("/.dockerenv"):
-        return True
-    # Podman uses /run/.containerenv
-    if os.path.exists("/run/.containerenv"):
-        return True
-    # Check cgroup for container runtime evidence (works for both Docker & Podman)
-    try:
-        with open("/proc/1/cgroup", "r") as f:
-            cgroup = f.read()
-            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
-                return True
-    except OSError:
-        pass
-    return False
-
-
 def get_container_exec_info() -> Optional[dict]:
    """Read container mode metadata from HERMES_HOME/.container-mode.

@@ -180,7 +162,8 @@ def get_container_exec_info() -> Optional[dict]:
    if os.environ.get("HERMES_DEV") == "1":
        return None

-    if _is_inside_container():
+    from hermes_constants import is_container
+    if is_container():
        return None

    container_mode_file = get_hermes_home() / ".container-mode"
@@ -354,6 +337,10 @@ DEFAULT_CONFIG = {
        # threshold before escalating to a full timeout.  The warning fires
        # once per run and does not interrupt the agent.  0 = disable warning.
        "gateway_timeout_warning": 900,
+        # Periodic "still working" notification interval (seconds).
+        # Sends a status message every N seconds so the user knows the
+        # agent hasn't died during long tasks.  0 = disable notifications.
+        "gateway_notify_interval": 600,
    },
    
    "terminal": {
@@ -427,9 +414,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-        "summary_model": "",          # empty = use main configured model
-        "summary_provider": "auto",
-        "summary_base_url": None,
+
    },
    "smart_model_routing": {
        "enabled": False,
@@ -706,8 +691,16 @@ DEFAULT_CONFIG = {
        "backup_count": 3,     # Number of rotated backup files to keep
    },

+    # Network settings — workarounds for connectivity issues.
+    "network": {
+        # Force IPv4 connections.  On servers with broken or unreachable IPv6,
+        # Python tries AAAA records first and hangs for the full TCP timeout
+        # before falling back to IPv4.  Set to true to skip IPv6 entirely.
+        "force_ipv4": False,
+    },
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 16,
+    "_config_version": 17,
 }

 # =============================================================================
@@ -1285,6 +1278,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_RECOVERY_KEY": {
+        "description": "Matrix recovery key for cross-signing verification after device key rotation (from Element: Settings → Security → Recovery Key)",
+        "prompt": "Matrix recovery key",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
    "BLUEBUBBLES_SERVER_URL": {
        "description": "BlueBubbles server URL for iMessage integration (e.g. http://192.168.1.10:1234)",
        "prompt": "BlueBubbles server URL",
@@ -1972,6 +1973,43 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                print(f"  ✓ Migrated tool_progress_overrides → display.platforms: {migrated}")
            results["config_added"].append("display.platforms (migrated from tool_progress_overrides)")

+    # ── Version 16 → 17: remove legacy compression.summary_* keys ──
+    if current_ver < 17:
+        config = read_raw_config()
+        comp = config.get("compression", {})
+        if isinstance(comp, dict):
+            s_model = comp.pop("summary_model", None)
+            s_provider = comp.pop("summary_provider", None)
+            s_base_url = comp.pop("summary_base_url", None)
+            migrated_keys = []
+            # Migrate non-empty, non-default values to auxiliary.compression
+            if s_model and str(s_model).strip():
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("model"):
+                    aux_comp["model"] = str(s_model).strip()
+                    migrated_keys.append(f"model={s_model}")
+            if s_provider and str(s_provider).strip() not in ("", "auto"):
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("provider") or aux_comp.get("provider") == "auto":
+                    aux_comp["provider"] = str(s_provider).strip()
+                    migrated_keys.append(f"provider={s_provider}")
+            if s_base_url and str(s_base_url).strip():
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("base_url"):
+                    aux_comp["base_url"] = str(s_base_url).strip()
+                    migrated_keys.append(f"base_url={s_base_url}")
+            if migrated_keys or s_model is not None or s_provider is not None or s_base_url is not None:
+                config["compression"] = comp
+                save_config(config)
+                if not quiet:
+                    if migrated_keys:
+                        print(f"  ✓ Migrated compression.summary_* → auxiliary.compression: {', '.join(migrated_keys)}")
+                    else:
+                        print("  ✓ Removed unused compression.summary_* keys")
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -2381,7 +2419,13 @@ def save_config(config: Dict[str, Any]):


 def load_env() -> Dict[str, str]:
-    """Load environment variables from ~/.hermes/.env."""
+    """Load environment variables from ~/.hermes/.env.
+
+    Sanitizes lines before parsing so that corrupted files (e.g.
+    concatenated KEY=VALUE pairs on a single line) are handled
+    gracefully instead of producing mangled values such as duplicated
+    bot tokens.  See #8908.
+    """
    env_path = get_env_path()
    env_vars = {}
    
@@ -2390,17 +2434,21 @@ def load_env() -> Dict[str, str]:
        # fail on UTF-8 .env files. Use explicit UTF-8 only on Windows.
        open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
        with open(env_path, **open_kw) as f:
-            for line in f:
-                line = line.strip()
-                if line and not line.startswith('#') and '=' in line:
-                    key, _, value = line.partition('=')
-                    env_vars[key.strip()] = value.strip().strip('"\'')
+            raw_lines = f.readlines()
+        # Sanitize before parsing: split concatenated lines & drop stale
+        # placeholders so corrupted .env files don't produce invalid tokens.
+        lines = _sanitize_env_lines(raw_lines)
+        for line in lines:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, _, value = line.partition('=')
+                env_vars[key.strip()] = value.strip().strip('"\'')
    
    return env_vars


 def _sanitize_env_lines(lines: list) -> list:
-    """Fix corrupted .env lines before writing.
+    """Fix corrupted .env lines before reading or writing.

    Handles two known corruption patterns:
    1. Concatenated KEY=VALUE pairs on a single line (missing newline between
@@ -2633,6 +2681,28 @@ def save_env_value_secure(key: str, value: str) -> Dict[str, Any]:



+def reload_env() -> int:
+    """Re-read ~/.hermes/.env into os.environ. Returns count of vars updated.
+
+    Adds/updates vars that changed and removes vars that were deleted from
+    the .env file (but only vars known to Hermes — OPTIONAL_ENV_VARS and
+    _EXTRA_ENV_KEYS — to avoid clobbering unrelated environment).
+    """
+    env_vars = load_env()
+    known_keys = set(OPTIONAL_ENV_VARS.keys()) | _EXTRA_ENV_KEYS
+    count = 0
+    for key, value in env_vars.items():
+        if os.environ.get(key) != value:
+            os.environ[key] = value
+            count += 1
+    # Remove known Hermes vars that are no longer in .env
+    for key in known_keys:
+        if key not in env_vars and key in os.environ:
+            del os.environ[key]
+            count += 1
+    return count
+
+
 def get_env_value(key: str) -> Optional[str]:
    """Get a value from ~/.hermes/.env or environment."""
    # Check environment first
@@ -2755,10 +2825,11 @@ def show_config():
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
-        _sm = compression.get('summary_model', '') or '(main model)'
+        _aux_comp = config.get('auxiliary', {}).get('compression', {})
+        _sm = _aux_comp.get('model', '') or '(auto)'
        print(f"  Model:        {_sm}")
-        comp_provider = compression.get('summary_provider', 'auto')
-        if comp_provider != 'auto':
+        comp_provider = _aux_comp.get('provider', 'auto')
+        if comp_provider and comp_provider != 'auto':
            print(f"  Provider:     {comp_provider}")
    
    # Auxiliary models
@@ -117,14 +117,30 @@ def _gh_cli_candidates() -> list[str]:


 def _try_gh_cli_token() -> Optional[str]:
-    """Return a token from ``gh auth token`` when the GitHub CLI is available."""
+    """Return a token from ``gh auth token`` when the GitHub CLI is available.
+
+    When COPILOT_GH_HOST is set, passes ``--hostname`` so gh returns the
+    correct host's token.  Also strips GITHUB_TOKEN / GH_TOKEN from the
+    subprocess environment so ``gh`` reads from its own credential store
+    (hosts.yml) instead of just echoing the env var back.
+    """
+    hostname = os.getenv("COPILOT_GH_HOST", "").strip()
+
+    # Build a clean env so gh doesn't short-circuit on GITHUB_TOKEN / GH_TOKEN
+    clean_env = {k: v for k, v in os.environ.items()
+                 if k not in ("GITHUB_TOKEN", "GH_TOKEN")}
+
    for gh_path in _gh_cli_candidates():
+        cmd = [gh_path, "auth", "token"]
+        if hostname:
+            cmd += ["--hostname", hostname]
        try:
            result = subprocess.run(
-                [gh_path, "auth", "token"],
+                cmd,
                capture_output=True,
                text=True,
                timeout=5,
+                env=clean_env,
            )
        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
            logger.debug("gh CLI token lookup failed (%s): %s", gh_path, exc)
@@ -0,0 +1,336 @@
+"""``hermes debug`` — debug tools for Hermes Agent.
+
+Currently supports:
+    hermes debug share    Upload debug report (system info + logs) to a
+                          paste service and print a shareable URL.
+"""
+
+import io
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Optional
+
+from hermes_constants import get_hermes_home
+
+
+# ---------------------------------------------------------------------------
+# Paste services — try paste.rs first, dpaste.com as fallback.
+# ---------------------------------------------------------------------------
+
+_PASTE_RS_URL = "https://paste.rs/"
+_DPASTE_COM_URL = "https://dpaste.com/api/"
+
+# Maximum bytes to read from a single log file for upload.
+# paste.rs caps at ~1 MB; we stay under that with headroom.
+_MAX_LOG_BYTES = 512_000
+
+
+def _upload_paste_rs(content: str) -> str:
+    """Upload to paste.rs.  Returns the paste URL.
+
+    paste.rs accepts a plain POST body and returns the URL directly.
+    """
+    data = content.encode("utf-8")
+    req = urllib.request.Request(
+        _PASTE_RS_URL, data=data, method="POST",
+        headers={
+            "Content-Type": "text/plain; charset=utf-8",
+            "User-Agent": "hermes-agent/debug-share",
+        },
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        url = resp.read().decode("utf-8").strip()
+    if not url.startswith("http"):
+        raise ValueError(f"Unexpected response from paste.rs: {url[:200]}")
+    return url
+
+
+def _upload_dpaste_com(content: str, expiry_days: int = 7) -> str:
+    """Upload to dpaste.com.  Returns the paste URL.
+
+    dpaste.com uses multipart form data.
+    """
+    boundary = "----HermesDebugBoundary9f3c"
+
+    def _field(name: str, value: str) -> str:
+        return (
+            f"--{boundary}\r\n"
+            f'Content-Disposition: form-data; name="{name}"\r\n'
+            f"\r\n"
+            f"{value}\r\n"
+        )
+
+    body = (
+        _field("content", content)
+        + _field("syntax", "text")
+        + _field("expiry_days", str(expiry_days))
+        + f"--{boundary}--\r\n"
+    ).encode("utf-8")
+
+    req = urllib.request.Request(
+        _DPASTE_COM_URL, data=body, method="POST",
+        headers={
+            "Content-Type": f"multipart/form-data; boundary={boundary}",
+            "User-Agent": "hermes-agent/debug-share",
+        },
+    )
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        url = resp.read().decode("utf-8").strip()
+    if not url.startswith("http"):
+        raise ValueError(f"Unexpected response from dpaste.com: {url[:200]}")
+    return url
+
+
+def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
+    """Upload *content* to a paste service, trying paste.rs then dpaste.com.
+
+    Returns the paste URL on success, raises on total failure.
+    """
+    errors: list[str] = []
+
+    # Try paste.rs first (simple, fast)
+    try:
+        return _upload_paste_rs(content)
+    except Exception as exc:
+        errors.append(f"paste.rs: {exc}")
+
+    # Fallback: dpaste.com (supports expiry)
+    try:
+        return _upload_dpaste_com(content, expiry_days=expiry_days)
+    except Exception as exc:
+        errors.append(f"dpaste.com: {exc}")
+
+    raise RuntimeError(
+        "Failed to upload to any paste service:\n  " + "\n  ".join(errors)
+    )
+
+
+# ---------------------------------------------------------------------------
+# Log file reading
+# ---------------------------------------------------------------------------
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the path if found, or None.
+    """
+    from hermes_cli.logs import LOG_FILES
+
+    filename = LOG_FILES.get(log_name)
+    if not filename:
+        return None
+
+    log_dir = get_hermes_home() / "logs"
+    primary = log_dir / filename
+    if primary.exists() and primary.stat().st_size > 0:
+        return primary
+
+    # Fall back to the most recent rotated file (.1).
+    rotated = log_dir / f"{filename}.1"
+    if rotated.exists() and rotated.stat().st_size > 0:
+        return rotated
+
+    return None
+
+
+def _read_log_tail(log_name: str, num_lines: int) -> str:
+    """Read the last *num_lines* from a log file, or return a placeholder."""
+    from hermes_cli.logs import _read_last_n_lines
+
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return "(file not found)"
+
+    try:
+        lines = _read_last_n_lines(log_path, num_lines)
+        return "".join(lines).rstrip("\n")
+    except Exception as exc:
+        return f"(error reading: {exc})"
+
+
+def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
+    """Read a log file for standalone upload.
+
+    Returns the file content (last *max_bytes* if truncated), or None if the
+    file doesn't exist or is empty.
+    """
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return None
+
+    try:
+        size = log_path.stat().st_size
+        if size == 0:
+            return None
+
+        if size <= max_bytes:
+            return log_path.read_text(encoding="utf-8", errors="replace")
+
+        # File is larger than max_bytes — read the tail.
+        with open(log_path, "rb") as f:
+            f.seek(size - max_bytes)
+            # Skip partial line at the seek point.
+            f.readline()
+            content = f.read().decode("utf-8", errors="replace")
+        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Debug report collection
+# ---------------------------------------------------------------------------
+
+def _capture_dump() -> str:
+    """Run ``hermes dump`` and return its stdout as a string."""
+    from hermes_cli.dump import run_dump
+
+    class _FakeArgs:
+        show_keys = False
+
+    old_stdout = sys.stdout
+    sys.stdout = capture = io.StringIO()
+    try:
+        run_dump(_FakeArgs())
+    except SystemExit:
+        pass
+    finally:
+        sys.stdout = old_stdout
+
+    return capture.getvalue()
+
+
+def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+    """Build the summary debug report: system dump + log tails.
+
+    Parameters
+    ----------
+    log_lines
+        Number of recent lines to include per log file.
+    dump_text
+        Pre-captured dump output.  If empty, ``hermes dump`` is run
+        internally.
+
+    Returns the report as a plain-text string ready for upload.
+    """
+    buf = io.StringIO()
+
+    if not dump_text:
+        dump_text = _capture_dump()
+    buf.write(dump_text)
+
+    # ── Recent log tails (summary only) ──────────────────────────────────
+    buf.write("\n\n")
+    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
+    buf.write(_read_log_tail("agent", log_lines))
+    buf.write("\n\n")
+
+    errors_lines = min(log_lines, 100)
+    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
+    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write("\n\n")
+
+    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
+    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write("\n")
+
+    return buf.getvalue()
+
+
+# ---------------------------------------------------------------------------
+# CLI entry points
+# ---------------------------------------------------------------------------
+
+def run_debug_share(args):
+    """Collect debug report + full logs, upload each, print URLs."""
+    log_lines = getattr(args, "lines", 200)
+    expiry = getattr(args, "expire", 7)
+    local_only = getattr(args, "local", False)
+
+    print("Collecting debug report...")
+
+    # Capture dump once — prepended to every paste for context.
+    dump_text = _capture_dump()
+
+    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
+    agent_log = _read_full_log("agent")
+    gateway_log = _read_full_log("gateway")
+
+    # Prepend dump header to each full log so every paste is self-contained.
+    if agent_log:
+        agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
+    if gateway_log:
+        gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+
+    if local_only:
+        print(report)
+        if agent_log:
+            print(f"\n\n{'=' * 60}")
+            print("FULL agent.log")
+            print(f"{'=' * 60}\n")
+            print(agent_log)
+        if gateway_log:
+            print(f"\n\n{'=' * 60}")
+            print("FULL gateway.log")
+            print(f"{'=' * 60}\n")
+            print(gateway_log)
+        return
+
+    print("Uploading...")
+    urls: dict[str, str] = {}
+    failures: list[str] = []
+
+    # 1. Summary report (required)
+    try:
+        urls["Report"] = upload_to_pastebin(report, expiry_days=expiry)
+    except RuntimeError as exc:
+        print(f"\nUpload failed: {exc}", file=sys.stderr)
+        print("\nFull report printed below — copy-paste it manually:\n")
+        print(report)
+        sys.exit(1)
+
+    # 2. Full agent.log (optional)
+    if agent_log:
+        try:
+            urls["agent.log"] = upload_to_pastebin(agent_log, expiry_days=expiry)
+        except Exception as exc:
+            failures.append(f"agent.log: {exc}")
+
+    # 3. Full gateway.log (optional)
+    if gateway_log:
+        try:
+            urls["gateway.log"] = upload_to_pastebin(gateway_log, expiry_days=expiry)
+        except Exception as exc:
+            failures.append(f"gateway.log: {exc}")
+
+    # Print results
+    label_width = max(len(k) for k in urls)
+    print(f"\nDebug report uploaded:")
+    for label, url in urls.items():
+        print(f"  {label:<{label_width}}  {url}")
+
+    if failures:
+        print(f"\n  (failed to upload: {', '.join(failures)})")
+
+    print(f"\nShare these links with the Hermes team for support.")
+
+
+def run_debug(args):
+    """Route debug subcommands."""
+    subcmd = getattr(args, "debug_command", None)
+    if subcmd == "share":
+        run_debug_share(args)
+    else:
+        # Default: show help
+        print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
+        print()
+        print("Commands:")
+        print("  share    Upload debug report to a paste service and print URL")
+        print()
+        print("Options:")
+        print("  --lines N    Number of log lines to include (default: 200)")
+        print("  --expire N   Paste expiry in days (default: 7)")
+        print("  --local      Print report locally instead of uploading")
@@ -44,6 +44,16 @@ def _redact(value: str) -> str:
 def _gateway_status() -> str:
    """Return a short gateway status string."""
    if sys.platform.startswith("linux"):
+        from hermes_constants import is_container
+        if is_container():
+            try:
+                from hermes_cli.gateway import find_gateway_pids
+                pids = find_gateway_pids()
+                if pids:
+                    return f"running (docker, pid {pids[0]})"
+                return "stopped (docker)"
+            except Exception:
+                return "stopped (docker)"
        try:
            from hermes_cli.gateway import get_service_name
            svc = get_service_name()
@@ -15,6 +15,51 @@ def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")


+def _sanitize_env_file_if_needed(path: Path) -> None:
+    """Pre-sanitize a .env file before python-dotenv reads it.
+
+    python-dotenv does not handle corrupted lines where multiple
+    KEY=VALUE pairs are concatenated on a single line (missing newline).
+    This produces mangled values — e.g. a bot token duplicated 8×
+    (see #8908).
+
+    We delegate to ``hermes_cli.config._sanitize_env_lines`` which
+    already knows all valid Hermes env-var names and can split
+    concatenated lines correctly.
+    """
+    if not path.exists():
+        return
+    try:
+        from hermes_cli.config import _sanitize_env_lines
+    except ImportError:
+        return  # early bootstrap — config module not available yet
+
+    read_kw = {"encoding": "utf-8", "errors": "replace"}
+    try:
+        with open(path, **read_kw) as f:
+            original = f.readlines()
+        sanitized = _sanitize_env_lines(original)
+        if sanitized != original:
+            import tempfile
+            fd, tmp = tempfile.mkstemp(
+                dir=str(path.parent), suffix=".tmp", prefix=".env_"
+            )
+            try:
+                with os.fdopen(fd, "w", encoding="utf-8") as f:
+                    f.writelines(sanitized)
+                    f.flush()
+                    os.fsync(f.fileno())
+                os.replace(tmp, path)
+            except BaseException:
+                try:
+                    os.unlink(tmp)
+                except OSError:
+                    pass
+                raise
+    except Exception:
+        pass  # best-effort — don't block gateway startup
+
+
 def load_hermes_dotenv(
    *,
    hermes_home: str | os.PathLike | None = None,
@@ -34,6 +79,10 @@ def load_hermes_dotenv(
    user_env = home_path / ".env"
    project_env_path = Path(project_env) if project_env else None

+    # Fix corrupted .env files before python-dotenv parses them (#8908).
+    if user_env.exists():
+        _sanitize_env_file_if_needed(user_env)
+
    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
        loaded.append(user_env)
@@ -331,7 +331,7 @@ def is_linux() -> bool:
    return sys.platform.startswith('linux')


-from hermes_constants import is_termux, is_wsl
+from hermes_constants import is_container, is_termux, is_wsl


 def _wsl_systemd_operational() -> bool:
@@ -353,7 +353,9 @@ def _wsl_systemd_operational() -> bool:


 def supports_systemd_services() -> bool:
-    if not is_linux() or is_termux():
+    if not is_linux() or is_termux() or is_container():
+        return False
+    if shutil.which("systemctl") is None:
        return False
    if is_wsl():
        return _wsl_systemd_operational()
@@ -483,6 +485,21 @@ def _journalctl_cmd(system: bool = False) -> list[str]:
    return ["journalctl"] if system else ["journalctl", "--user"]


+def _run_systemctl(args: list[str], *, system: bool = False, **kwargs) -> subprocess.CompletedProcess:
+    """Run a systemctl command, raising RuntimeError if systemctl is missing.
+
+    Defense-in-depth: callers are gated by ``supports_systemd_services()``,
+    but this ensures any future caller that bypasses the gate still gets a
+    clear error instead of a raw ``FileNotFoundError`` traceback.
+    """
+    try:
+        return subprocess.run(_systemctl_cmd(system) + args, **kwargs)
+    except FileNotFoundError:
+        raise RuntimeError(
+            "systemctl is not available on this system"
+        ) from None
+
+
 def _service_scope_label(system: bool = False) -> str:
    return "system" if system else "user"

@@ -751,14 +768,22 @@ def _remap_path_for_user(path: str, target_home_dir: str) -> str:

      /root/.hermes/hermes-agent  -> /home/alice/.hermes/hermes-agent
      /opt/hermes                 -> /opt/hermes  (kept as-is)
+
+    Note: this function intentionally does NOT resolve symlinks. A venv's
+    ``bin/python`` is typically a symlink to the base interpreter (e.g. a
+    uv-managed CPython at ``~/.local/share/uv/python/.../python3.11``);
+    resolving that symlink swaps the unit's ``ExecStart`` to a bare Python
+    that has none of the venv's site-packages, so the service crashes on
+    the first ``import``. Keep the symlinked path so the venv activates
+    its own environment. Lexical expansion only via ``expanduser``.
    """
-    current_home = Path.home().resolve()
-    resolved = Path(path).resolve()
+    current_home = Path.home()
+    p = Path(path).expanduser()
    try:
-        relative = resolved.relative_to(current_home)
+        relative = p.relative_to(current_home)
        return str(Path(target_home_dir) / relative)
    except ValueError:
-        return str(resolved)
+        return str(p)


 def _hermes_home_for_target_user(target_home_dir: str) -> str:
@@ -929,7 +954,7 @@ def refresh_systemd_unit_if_needed(system: bool = False) -> bool:

    expected_user = _read_systemd_user_from_unit(unit_path) if system else None
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=expected_user), encoding="utf-8")
-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
    print(f"↻ Updated gateway {_service_scope_label(system)} service definition to match the current Hermes install")
    return True

@@ -1025,7 +1050,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
        if not systemd_unit_is_current(system=system):
            print(f"↻ Repairing outdated {_service_scope_label(system)} systemd service at: {unit_path}")
            refresh_systemd_unit_if_needed(system=system)
-            subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)
+            _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)
            print(f"✓ {_service_scope_label(system).capitalize()} service definition updated")
            return
        print(f"Service already installed at: {unit_path}")
@@ -1036,8 +1061,8 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    print(f"Installing {_service_scope_label(system)} systemd service to: {unit_path}")
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")

-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
-    subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
+    _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)

    print()
    print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
@@ -1063,15 +1088,15 @@ def systemd_uninstall(system: bool = False):
    if system:
        _require_root_for_system_service("uninstall")

-    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False, timeout=90)
-    subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False, timeout=30)
+    _run_systemctl(["stop", get_service_name()], system=system, check=False, timeout=90)
+    _run_systemctl(["disable", get_service_name()], system=system, check=False, timeout=30)

    unit_path = get_systemd_unit_path(system=system)
    if unit_path.exists():
        unit_path.unlink()
        print(f"✓ Removed {unit_path}")

-    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
+    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")


@@ -1080,7 +1105,7 @@ def systemd_start(system: bool = False):
    if system:
        _require_root_for_system_service("start")
    refresh_systemd_unit_if_needed(system=system)
-    subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True, timeout=30)
+    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")


@@ -1089,7 +1114,7 @@ def systemd_stop(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("stop")
-    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True, timeout=90)
+    _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")


@@ -1105,7 +1130,7 @@ def systemd_restart(system: bool = False):
    if pid is not None and _request_gateway_self_restart(pid):
        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
        return
-    subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
+    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")


@@ -1129,14 +1154,16 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

-    subprocess.run(
-        _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"],
+    _run_systemctl(
+        ["status", get_service_name(), "--no-pager"],
+        system=system,
        capture_output=False,
        timeout=10,
    )

-    result = subprocess.run(
-        _systemctl_cmd(system) + ["is-active", get_service_name()],
+    result = _run_systemctl(
+        ["is-active", get_service_name()],
+        system=system,
        capture_output=True,
        text=True,
        timeout=10,
@@ -2100,12 +2127,6 @@ def _setup_dingtalk():
    _setup_standard_platform(dingtalk_platform)


-def _setup_feishu():
-    """Configure Feishu / Lark via the standard platform setup."""
-    feishu_platform = next(p for p in _PLATFORMS if p["key"] == "feishu")
-    _setup_standard_platform(feishu_platform)
-
-
 def _setup_wecom():
    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
@@ -2129,24 +2150,24 @@ def _is_service_running() -> bool:

        if user_unit_exists:
            try:
-                result = subprocess.run(
-                    _systemctl_cmd(False) + ["is-active", get_service_name()],
-                    capture_output=True, text=True, timeout=10,
+                result = _run_systemctl(
+                    ["is-active", get_service_name()],
+                    system=False, capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except subprocess.TimeoutExpired:
+            except (RuntimeError, subprocess.TimeoutExpired):
                pass

        if system_unit_exists:
            try:
-                result = subprocess.run(
-                    _systemctl_cmd(True) + ["is-active", get_service_name()],
-                    capture_output=True, text=True, timeout=10,
+                result = _run_systemctl(
+                    ["is-active", get_service_name()],
+                    system=True, capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except subprocess.TimeoutExpired:
+            except (RuntimeError, subprocess.TimeoutExpired):
                pass

        return False
@@ -2290,6 +2311,178 @@ def _setup_weixin():
        print_info(f"  User ID: {user_id}")


+def _setup_feishu():
+    """Interactive setup for Feishu / Lark — scan-to-create or manual credentials."""
+    print()
+    print(color("  ─── 🪽 Feishu / Lark Setup ───", Colors.CYAN))
+
+    existing_app_id = get_env_value("FEISHU_APP_ID")
+    existing_secret = get_env_value("FEISHU_APP_SECRET")
+    if existing_app_id and existing_secret:
+        print()
+        print_success("Feishu / Lark is already configured.")
+        if not prompt_yes_no("  Reconfigure Feishu / Lark?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to create a new bot automatically (recommended)",
+        "Enter existing App ID and App Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up Feishu / Lark?", method_choices, 0)
+
+    credentials = None
+    used_qr = False
+
+    if method_idx == 0:
+        # ── QR scan-to-create ──
+        try:
+            from gateway.platforms.feishu import qr_register
+        except Exception as exc:
+            print_error(f"  Feishu / Lark onboard import failed: {exc}")
+            qr_register = None
+
+        if qr_register is not None:
+            try:
+                credentials = qr_register()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  Feishu / Lark setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR registration failed: {exc}")
+        if credentials:
+            used_qr = True
+        if not credentials:
+            print_info("  QR setup did not complete. Continuing with manual input.")
+
+    # ── Manual credential input ──
+    if not credentials:
+        print()
+        print_info("  Go to https://open.feishu.cn/ (or https://open.larksuite.com/ for Lark)")
+        print_info("  Create an app, enable the Bot capability, and copy the credentials.")
+        print()
+        app_id = prompt("  App ID", password=False)
+        if not app_id:
+            print_warning("  Skipped — Feishu / Lark won't work without an App ID.")
+            return
+        app_secret = prompt("  App Secret", password=True)
+        if not app_secret:
+            print_warning("  Skipped — Feishu / Lark won't work without an App Secret.")
+            return
+
+        domain_choices = ["feishu (China)", "lark (International)"]
+        domain_idx = prompt_choice("  Domain", domain_choices, 0)
+        domain = "lark" if domain_idx == 1 else "feishu"
+
+        # Try to probe the bot with manual credentials
+        bot_name = None
+        try:
+            from gateway.platforms.feishu import probe_bot
+            bot_info = probe_bot(app_id, app_secret, domain)
+            if bot_info:
+                bot_name = bot_info.get("bot_name")
+                print_success(f"  Credentials verified — bot: {bot_name or 'unnamed'}")
+            else:
+                print_warning("  Could not verify bot connection. Credentials saved anyway.")
+        except Exception as exc:
+            print_warning(f"  Credential verification skipped: {exc}")
+
+        credentials = {
+            "app_id": app_id,
+            "app_secret": app_secret,
+            "domain": domain,
+            "open_id": None,
+            "bot_name": bot_name,
+        }
+
+    # ── Save core credentials ──
+    app_id = credentials["app_id"]
+    app_secret = credentials["app_secret"]
+    domain = credentials.get("domain", "feishu")
+    open_id = credentials.get("open_id")
+    bot_name = credentials.get("bot_name")
+
+    save_env_value("FEISHU_APP_ID", app_id)
+    save_env_value("FEISHU_APP_SECRET", app_secret)
+    save_env_value("FEISHU_DOMAIN", domain)
+    # Bot identity is resolved at runtime via _hydrate_bot_identity().
+
+    # ── Connection mode ──
+    if used_qr:
+        connection_mode = "websocket"
+    else:
+        print()
+        mode_choices = [
+            "WebSocket (recommended — no public URL needed)",
+            "Webhook (requires a reachable HTTP endpoint)",
+        ]
+        mode_idx = prompt_choice("  Connection mode", mode_choices, 0)
+        connection_mode = "webhook" if mode_idx == 1 else "websocket"
+        if connection_mode == "webhook":
+            print_info("  Webhook defaults: 127.0.0.1:8765/feishu/webhook")
+            print_info("  Override with FEISHU_WEBHOOK_HOST / FEISHU_WEBHOOK_PORT / FEISHU_WEBHOOK_PATH")
+            print_info("  For signature verification, set FEISHU_ENCRYPT_KEY and FEISHU_VERIFICATION_TOKEN")
+    save_env_value("FEISHU_CONNECTION_MODE", connection_mode)
+
+    if bot_name:
+        print()
+        print_success(f"  Bot created: {bot_name}")
+
+    # ── DM security policy ──
+    print()
+    access_choices = [
+        "Use DM pairing approval (recommended)",
+        "Allow all direct messages",
+        "Only allow listed user IDs",
+    ]
+    access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
+    if access_idx == 0:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
+        save_env_value("FEISHU_ALLOWED_USERS", "")
+        print_success("  DM pairing enabled.")
+        print_info("  Unknown users can request access; approve with `hermes pairing approve`.")
+    elif access_idx == 1:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "true")
+        save_env_value("FEISHU_ALLOWED_USERS", "")
+        print_warning("  Open DM access enabled for Feishu / Lark.")
+    else:
+        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
+        default_allow = open_id or ""
+        allowlist = prompt("  Allowed user IDs (comma-separated)", default_allow, password=False).replace(" ", "")
+        save_env_value("FEISHU_ALLOWED_USERS", allowlist)
+        print_success("  Allowlist saved.")
+
+    # ── Group policy ──
+    print()
+    group_choices = [
+        "Respond only when @mentioned in groups (recommended)",
+        "Disable group chats",
+    ]
+    group_idx = prompt_choice("  How should group chats be handled?", group_choices, 0)
+    if group_idx == 0:
+        save_env_value("FEISHU_GROUP_POLICY", "open")
+        print_info("  Group chats enabled (bot must be @mentioned).")
+    else:
+        save_env_value("FEISHU_GROUP_POLICY", "disabled")
+        print_info("  Group chats disabled.")
+
+    # ── Home channel ──
+    print()
+    home_channel = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home_channel:
+        save_env_value("FEISHU_HOME_CHANNEL", home_channel)
+        print_success(f"  Home channel set to {home_channel}")
+
+    print()
+    print_success("🪽 Feishu / Lark configured!")
+    print_info(f"  App ID: {app_id}")
+    print_info(f"  Domain: {domain}")
+    if bot_name:
+        print_info(f"  Bot: {bot_name}")
+
+
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -2467,6 +2660,8 @@ def gateway_setup():
            _setup_signal()
        elif platform["key"] == "weixin":
            _setup_weixin()
+        elif platform["key"] == "feishu":
+            _setup_feishu()
        else:
            _setup_standard_platform(platform)

@@ -2606,6 +2801,15 @@ def gateway_command(args):
            print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
            sys.exit(1)
+        elif is_container():
+            print("Service installation is not needed inside a Docker container.")
+            print("The container runtime is your service manager — use Docker restart policies instead:")
+            print()
+            print("  docker run --restart unless-stopped ...   # auto-restart on crash/reboot")
+            print("  docker restart <container>                # manual restart")
+            print()
+            print("To run the gateway: hermes gateway run")
+            sys.exit(0)
        else:
            print("Service installation not supported on this platform.")
            print("Run manually: hermes gateway run")
@@ -2624,10 +2828,17 @@ def gateway_command(args):
            systemd_uninstall(system=system)
        elif is_macos():
            launchd_uninstall()
+        elif is_container():
+            print("Service uninstall is not applicable inside a Docker container.")
+            print("To stop the gateway, stop or remove the container:")
+            print()
+            print("  docker stop <container>")
+            print("  docker rm <container>")
+            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-    
+
    elif subcmd == "start":
        system = getattr(args, 'system', False)
        if is_termux():
@@ -2648,10 +2859,19 @@ def gateway_command(args):
            print()
            print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
            sys.exit(1)
+        elif is_container():
+            print("Service start is not applicable inside a Docker container.")
+            print("The gateway runs as the container's main process.")
+            print()
+            print("  docker start <container>     # start a stopped container")
+            print("  docker restart <container>   # restart a running container")
+            print()
+            print("Or run the gateway directly: hermes gateway run")
+            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-    
+
    elif subcmd == "stop":
        stop_all = getattr(args, 'all', False)
        system = getattr(args, 'system', False)
@@ -1,16 +1,18 @@
 """``hermes logs`` — view and filter Hermes log files.

-Supports tailing, following, session filtering, level filtering, and
-relative time ranges.  All log files live under ``~/.hermes/logs/``.
+Supports tailing, following, session filtering, level filtering,
+component filtering, and relative time ranges.  All log files live
+under ``~/.hermes/logs/``.

 Usage examples::

    hermes logs                    # last 50 lines of agent.log
    hermes logs -f                 # follow agent.log in real time
    hermes logs errors             # last 50 lines of errors.log
-    hermes logs gateway -n 100     # last 100 lines of gateway.log
+    hermes logs gateway -n 100    # last 100 lines of gateway.log
    hermes logs --level WARNING    # only WARNING+ lines
    hermes logs --session abc123   # filter by session ID substring
+    hermes logs --component tools  # only tool-related lines
    hermes logs --since 1h         # lines from the last hour
    hermes logs --since 30m -f     # follow, starting 30 min ago
 """
@@ -20,7 +22,7 @@ import sys
 import time
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Sequence

 from hermes_constants import get_hermes_home, display_hermes_home

@@ -38,6 +40,15 @@ _TS_RE = re.compile(r"^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})")
 # Level extraction — matches " INFO ", " WARNING ", " ERROR ", " DEBUG ", " CRITICAL "
 _LEVEL_RE = re.compile(r"\s(DEBUG|INFO|WARNING|ERROR|CRITICAL)\s")

+# Logger name extraction — after level and optional session tag, the next
+# non-space token before ":" is the logger name.
+# Matches: "INFO gateway.run:" or "INFO [sess_abc] tools.terminal_tool:"
+_LOGGER_NAME_RE = re.compile(
+    r"\s(?:DEBUG|INFO|WARNING|ERROR|CRITICAL)"  # level
+    r"(?:\s+\[.*?\])?"                           # optional session tag
+    r"\s+(\S+):"                                 # logger name
+)
+
 # Level ordering for >= filtering
 _LEVEL_ORDER = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3, "CRITICAL": 4}

@@ -79,12 +90,27 @@ def _extract_level(line: str) -> Optional[str]:
    return m.group(1) if m else None


+def _extract_logger_name(line: str) -> Optional[str]:
+    """Extract the logger name from a log line."""
+    m = _LOGGER_NAME_RE.search(line)
+    return m.group(1) if m else None
+
+
+def _line_matches_component(line: str, prefixes: Sequence[str]) -> bool:
+    """Check if a log line's logger name starts with any of *prefixes*."""
+    name = _extract_logger_name(line)
+    if name is None:
+        return False
+    return name.startswith(tuple(prefixes))
+
+
 def _matches_filters(
    line: str,
    *,
    min_level: Optional[str] = None,
    session_filter: Optional[str] = None,
    since: Optional[datetime] = None,
+    component_prefixes: Optional[Sequence[str]] = None,
 ) -> bool:
    """Check if a log line passes all active filters."""
    if since is not None:
@@ -102,6 +128,10 @@ def _matches_filters(
        if session_filter not in line:
            return False

+    if component_prefixes is not None:
+        if not _line_matches_component(line, component_prefixes):
+            return False
+
    return True


@@ -113,6 +143,7 @@ def tail_log(
    level: Optional[str] = None,
    session: Optional[str] = None,
    since: Optional[str] = None,
+    component: Optional[str] = None,
 ) -> None:
    """Read and display log lines, optionally following in real time.

@@ -130,6 +161,8 @@ def tail_log(
        Session ID substring to filter on.
    since
        Relative time string (e.g. ``"1h"``, ``"30m"``).
+    component
+        Component name to filter by (e.g. ``"gateway"``, ``"tools"``).
    """
    filename = LOG_FILES.get(log_name)
    if filename is None:
@@ -155,13 +188,29 @@ def tail_log(
        print(f"Invalid --level: {level!r}. Use DEBUG, INFO, WARNING, ERROR, or CRITICAL.")
        sys.exit(1)

-    has_filters = min_level is not None or session is not None or since_dt is not None
+    # Resolve component to logger name prefixes
+    component_prefixes = None
+    if component:
+        from hermes_logging import COMPONENT_PREFIXES
+        component_lower = component.lower()
+        if component_lower not in COMPONENT_PREFIXES:
+            available = ", ".join(sorted(COMPONENT_PREFIXES))
+            print(f"Unknown component: {component!r}. Available: {available}")
+            sys.exit(1)
+        component_prefixes = COMPONENT_PREFIXES[component_lower]
+
+    has_filters = (
+        min_level is not None
+        or session is not None
+        or since_dt is not None
+        or component_prefixes is not None
+    )

    # Read and display the tail
    try:
        lines = _read_tail(log_path, num_lines, has_filters=has_filters,
                           min_level=min_level, session_filter=session,
-                           since=since_dt)
+                           since=since_dt, component_prefixes=component_prefixes)
    except PermissionError:
        print(f"Permission denied: {log_path}")
        sys.exit(1)
@@ -172,6 +221,8 @@ def tail_log(
        filter_parts.append(f"level>={min_level}")
    if session:
        filter_parts.append(f"session={session}")
+    if component:
+        filter_parts.append(f"component={component}")
    if since:
        filter_parts.append(f"since={since}")
    filter_desc = f" [{', '.join(filter_parts)}]" if filter_parts else ""
@@ -190,7 +241,7 @@ def tail_log(
    # Follow mode — poll for new content
    try:
        _follow_log(log_path, min_level=min_level, session_filter=session,
-                     since=since_dt)
+                     since=since_dt, component_prefixes=component_prefixes)
    except KeyboardInterrupt:
        print("\n--- stopped ---")

@@ -203,6 +254,7 @@ def _read_tail(
    min_level: Optional[str] = None,
    session_filter: Optional[str] = None,
    since: Optional[datetime] = None,
+    component_prefixes: Optional[Sequence[str]] = None,
 ) -> list:
    """Read the last *num_lines* matching lines from a log file.

@@ -215,7 +267,8 @@ def _read_tail(
        filtered = [
            l for l in raw_lines
            if _matches_filters(l, min_level=min_level,
-                                session_filter=session_filter, since=since)
+                                session_filter=session_filter, since=since,
+                                component_prefixes=component_prefixes)
        ]
        return filtered[-num_lines:]
    else:
@@ -284,6 +337,7 @@ def _follow_log(
    min_level: Optional[str] = None,
    session_filter: Optional[str] = None,
    since: Optional[datetime] = None,
+    component_prefixes: Optional[Sequence[str]] = None,
 ) -> None:
    """Poll a log file for new content and print matching lines."""
    with open(path, "r", encoding="utf-8", errors="replace") as f:
@@ -293,7 +347,8 @@ def _follow_log(
            line = f.readline()
            if line:
                if _matches_filters(line, min_level=min_level,
-                                    session_filter=session_filter, since=since):
+                                    session_filter=session_filter, since=since,
+                                    component_prefixes=component_prefixes):
                    print(line, end="")
                    sys.stdout.flush()
            else:
@@ -151,6 +151,18 @@ try:
 except Exception:
    pass  # best-effort — don't crash the CLI if logging setup fails

+# Apply IPv4 preference early, before any HTTP clients are created.
+try:
+    from hermes_cli.config import load_config as _load_config_early
+    from hermes_constants import apply_ipv4_preference as _apply_ipv4
+    _early_cfg = _load_config_early()
+    _net = _early_cfg.get("network", {})
+    if isinstance(_net, dict) and _net.get("force_ipv4"):
+        _apply_ipv4(force=True)
+    del _early_cfg, _net
+except Exception:
+    pass  # best-effort — don't crash if config isn't available yet
+
 import logging
 import time as _time
 from datetime import datetime
@@ -1095,6 +1107,7 @@ def select_provider_and_model(args=None):
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
                "model": entry.get("model", ""),
+                "api_mode": entry.get("api_mode", ""),
            }
        return custom_provider_map

@@ -1943,6 +1956,12 @@ def _model_flow_named_custom(config, provider_info):
    model["base_url"] = base_url
    if api_key:
        model["api_key"] = api_key
+    # Apply api_mode from custom_providers entry, or clear stale value
+    custom_api_mode = provider_info.get("api_mode", "")
+    if custom_api_mode:
+        model["api_mode"] = custom_api_mode
+    else:
+        model.pop("api_mode", None)  # let runtime auto-detect from URL
    save_config(cfg)
    deactivate_provider()

@@ -2480,8 +2499,11 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print()
        override = ""
    if override and base_url_env:
-        save_env_value(base_url_env, override)
-        effective_base = override
+        if not override.startswith(("http://", "https://")):
+            print("  Invalid URL — must start with http:// or https://. Keeping current value.")
+        else:
+            save_env_value(base_url_env, override)
+            effective_base = override

    # Model selection — resolution order:
    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
@@ -2812,12 +2834,34 @@ def cmd_dump(args):
    run_dump(args)


+def cmd_debug(args):
+    """Debug tools (share report, etc.)."""
+    from hermes_cli.debug import run_debug
+    run_debug(args)
+
+
 def cmd_config(args):
    """Configuration management."""
    from hermes_cli.config import config_command
    config_command(args)


+def cmd_backup(args):
+    """Back up Hermes home directory to a zip file."""
+    if getattr(args, "quick", False):
+        from hermes_cli.backup import run_quick_backup
+        run_quick_backup(args)
+    else:
+        from hermes_cli.backup import run_backup
+        run_backup(args)
+
+
+def cmd_import(args):
+    """Restore a Hermes backup from a zip file."""
+    from hermes_cli.backup import run_import
+    run_import(args)
+
+
 def cmd_version(args):
    """Show version."""
    print(f"Hermes Agent v{__version__} ({__release_date__})")
@@ -2936,6 +2980,44 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    return default


+def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
+    """Build the web UI frontend if npm is available.
+
+    Args:
+        web_dir: Path to the ``web/`` source directory.
+        fatal: If True, print error guidance and return False on failure
+               instead of a soft warning (used by ``hermes web``).
+
+    Returns True if the build succeeded or was skipped (no package.json).
+    """
+    if not (web_dir / "package.json").exists():
+        return True
+    import shutil
+    npm = shutil.which("npm")
+    if not npm:
+        if fatal:
+            print("Web UI frontend not built and npm is not available.")
+            print("Install Node.js, then run:  cd web && npm install && npm run build")
+        return not fatal
+    print("→ Building web UI...")
+    r1 = subprocess.run([npm, "install", "--silent"], cwd=web_dir, capture_output=True)
+    if r1.returncode != 0:
+        print(f"  {'✗' if fatal else '⚠'} Web UI npm install failed"
+              + ("" if fatal else " (hermes web will not be available)"))
+        if fatal:
+            print("  Run manually:  cd web && npm install && npm run build")
+        return False
+    r2 = subprocess.run([npm, "run", "build"], cwd=web_dir, capture_output=True)
+    if r2.returncode != 0:
+        print(f"  {'✗' if fatal else '⚠'} Web UI build failed"
+              + ("" if fatal else " (hermes web will not be available)"))
+        if fatal:
+            print("  Run manually:  cd web && npm install && npm run build")
+        return False
+    print("  ✓ Web UI built")
+    return True
+
+
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.
    
@@ -3030,7 +3112,10 @@ def _update_via_zip(args):
                check=True,
            )
        _install_python_dependencies_with_optional_fallback(pip_cmd)
-    
+
+    # Build web UI frontend (optional — requires npm)
+    _build_web_ui(PROJECT_ROOT / "web")
+
    # Sync skills
    try:
        from tools.skills_sync import sync_skills
@@ -3777,7 +3862,10 @@ def cmd_update(args):
            if shutil.which("npm"):
                print("→ Updating Node.js dependencies...")
                subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False)
-        
+
+        # Build web UI frontend (optional — requires npm)
+        _build_web_ui(PROJECT_ROOT / "web")
+
        print()
        print("✓ Code updated!")
        
@@ -3905,6 +3993,26 @@ def cmd_update(args):
        print()
        print("✓ Update complete!")
        
+        # Write exit code *before* the gateway restart attempt.
+        # When running as ``hermes update --gateway`` (spawned by the gateway's
+        # /update command), this process lives inside the gateway's systemd
+        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
+        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
+        # us and the wrapping bash shell.  The shell never reaches its
+        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
+        # marker file is never created.  The new gateway's update watcher then
+        # polls for 30 minutes and sends a spurious timeout message.
+        #
+        # Writing the marker here — after git pull + pip install succeed but
+        # before we attempt the restart — ensures the new gateway sees it
+        # regardless of how we die.
+        if gateway_mode:
+            _exit_code_path = get_hermes_home() / ".update_exit_code"
+            try:
+                _exit_code_path.write_text("0")
+            except OSError:
+                pass
+        
        # Auto-restart ALL gateways after update.
        # The code update (git pull) is shared across all profiles, so every
        # running gateway needs restarting to pick up the new code.
@@ -4039,7 +4147,7 @@ def _coalesce_session_name_args(argv: list) -> list:
        "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth",
        "status", "cron", "doctor", "config", "pairing", "skills", "tools",
        "mcp", "sessions", "insights", "version", "update", "uninstall",
-        "profile",
+        "profile", "dashboard",
    }
    _SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"}

@@ -4189,18 +4297,24 @@ def cmd_profile(args):
                            print(f'  Add to your shell config (~/.bashrc or ~/.zshrc):')
                            print(f'    export PATH="$HOME/.local/bin:$PATH"')

+            # Profile dir for display
+            try:
+                profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
+            except ValueError:
+                profile_dir_display = str(profile_dir)
+
            # Next steps
            print(f"\nNext steps:")
            print(f"  {name} setup              Configure API keys and model")
            print(f"  {name} chat               Start chatting")
            print(f"  {name} gateway start      Start the messaging gateway")
            if clone or clone_all:
-                try:
-                    profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
-                except ValueError:
-                    profile_dir_display = str(profile_dir)
                print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
+            else:
+                print(f"\n  ⚠ This profile has no API keys yet. Run '{name} setup' first,")
+                print(f"    or it will inherit keys from your shell environment.")
+                print(f"  Edit {profile_dir_display}/SOUL.md to customize personality")
            print()

        except (ValueError, FileExistsError, FileNotFoundError) as e:
@@ -4311,6 +4425,27 @@ def cmd_profile(args):
            sys.exit(1)


+def cmd_dashboard(args):
+    """Start the web UI server."""
+    try:
+        import fastapi  # noqa: F401
+        import uvicorn  # noqa: F401
+    except ImportError:
+        print("Web UI dependencies not installed.")
+        print("Install them with:  pip install hermes-agent[web]")
+        sys.exit(1)
+
+    if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
+        sys.exit(1)
+
+    from hermes_cli.web_server import start_server
+    start_server(
+        host=args.host,
+        port=args.port,
+        open_browser=not args.no_open,
+    )
+
+
 def cmd_completion(args):
    """Print shell completion script."""
    from hermes_cli.profiles import generate_bash_completion, generate_zsh_completion
@@ -4338,6 +4473,7 @@ def cmd_logs(args):
        level=getattr(args, "level", None),
        session=getattr(args, "session", None),
        since=getattr(args, "since", None),
+        component=getattr(args, "component", None),
    )


@@ -4375,6 +4511,7 @@ Examples:
    hermes logs -f                Follow agent.log in real time
    hermes logs errors            View errors.log
    hermes logs --since 1h        Lines from the last hour
+    hermes debug share             Upload debug report for support
    hermes update                 Update to latest version

 For more help on a command:
@@ -4903,7 +5040,90 @@ For more help on a command:
        help="Show redacted API key prefixes (first/last 4 chars) instead of just set/not set"
    )
    dump_parser.set_defaults(func=cmd_dump)
-    
+
+    # =========================================================================
+    # debug command
+    # =========================================================================
+    debug_parser = subparsers.add_parser(
+        "debug",
+        help="Debug tools — upload logs and system info for support",
+        description="Debug utilities for Hermes Agent. Use 'hermes debug share' to "
+                    "upload a debug report (system info + recent logs) to a paste "
+                    "service and get a shareable URL.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""\
+Examples:
+    hermes debug share              Upload debug report and print URL
+    hermes debug share --lines 500  Include more log lines
+    hermes debug share --expire 30  Keep paste for 30 days
+    hermes debug share --local      Print report locally (no upload)
+""",
+    )
+    debug_sub = debug_parser.add_subparsers(dest="debug_command")
+    share_parser = debug_sub.add_parser(
+        "share",
+        help="Upload debug report to a paste service and print a shareable URL",
+    )
+    share_parser.add_argument(
+        "--lines", type=int, default=200,
+        help="Number of log lines to include per log file (default: 200)",
+    )
+    share_parser.add_argument(
+        "--expire", type=int, default=7,
+        help="Paste expiry in days (default: 7)",
+    )
+    share_parser.add_argument(
+        "--local", action="store_true",
+        help="Print the report locally instead of uploading",
+    )
+    debug_parser.set_defaults(func=cmd_debug)
+
+    # =========================================================================
+    # backup command
+    # =========================================================================
+    backup_parser = subparsers.add_parser(
+        "backup",
+        help="Back up Hermes home directory to a zip file",
+        description="Create a zip archive of your entire Hermes configuration, "
+                    "skills, sessions, and data (excludes the hermes-agent codebase). "
+                    "Use --quick for a fast snapshot of just critical state files."
+    )
+    backup_parser.add_argument(
+        "-o", "--output",
+        help="Output path for the zip file (default: ~/hermes-backup-<timestamp>.zip)"
+    )
+    backup_parser.add_argument(
+        "-q", "--quick",
+        action="store_true",
+        help="Quick snapshot: only critical state files (config, state.db, .env, auth, cron)"
+    )
+    backup_parser.add_argument(
+        "-l", "--label",
+        help="Label for the snapshot (only used with --quick)"
+    )
+    backup_parser.set_defaults(func=cmd_backup)
+
+    # =========================================================================
+    # import command
+    # =========================================================================
+    import_parser = subparsers.add_parser(
+        "import",
+        help="Restore a Hermes backup from a zip file",
+        description="Extract a previously created Hermes backup into your "
+                    "Hermes home directory, restoring configuration, skills, "
+                    "sessions, and data"
+    )
+    import_parser.add_argument(
+        "zipfile",
+        help="Path to the backup zip file"
+    )
+    import_parser.add_argument(
+        "--force", "-f",
+        action="store_true",
+        help="Overwrite existing files without confirmation"
+    )
+    import_parser.set_defaults(func=cmd_import)
+
    # =========================================================================
    # config command
    # =========================================================================
@@ -5721,6 +5941,19 @@ For more help on a command:
    )
    completion_parser.set_defaults(func=cmd_completion)

+    # =========================================================================
+    # dashboard command
+    # =========================================================================
+    dashboard_parser = subparsers.add_parser(
+        "dashboard",
+        help="Start the web UI dashboard",
+        description="Launch the Hermes Agent web dashboard for managing config, API keys, and sessions",
+    )
+    dashboard_parser.add_argument("--port", type=int, default=9119, help="Port (default 9119)")
+    dashboard_parser.add_argument("--host", default="127.0.0.1", help="Host (default 127.0.0.1)")
+    dashboard_parser.add_argument("--no-open", action="store_true", help="Don't open browser automatically")
+    dashboard_parser.set_defaults(func=cmd_dashboard)
+
    # =========================================================================
    # logs command
    # =========================================================================
@@ -5737,6 +5970,7 @@ Examples:
    hermes logs gateway -n 100     Show last 100 lines of gateway.log
    hermes logs --level WARNING    Only show WARNING and above
    hermes logs --session abc123   Filter by session ID
+    hermes logs --component tools  Only show tool-related lines
    hermes logs --since 1h         Lines from the last hour
    hermes logs --since 30m -f     Follow, starting from 30 min ago
    hermes logs list               List available log files with sizes
@@ -5766,6 +6000,10 @@ Examples:
        "--since", metavar="TIME",
        help="Show lines since TIME ago (e.g. 1h, 30m, 2d)",
    )
+    logs_parser.add_argument(
+        "--component", metavar="NAME",
+        help="Filter by component: gateway, agent, tools, cli, cron",
+    )
    logs_parser.set_defaults(func=cmd_logs)

    # =========================================================================
@@ -8,8 +8,9 @@ Different LLM providers expect model identifiers in different formats:
  hyphens: ``claude-sonnet-4-6``.
 - **Copilot** expects bare names *with* dots preserved:
  ``claude-sonnet-4.6``.
- **OpenCode Zen** follows the same dot-to-hyphen convention as
-  Anthropic: ``claude-sonnet-4-6``.
+- **OpenCode Zen** preserves dots for GPT/GLM/Gemini/Kimi/MiniMax-style
+  model IDs, but Claude still uses hyphenated native names like
+  ``claude-sonnet-4-6``.
 - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
 - **DeepSeek** only accepts two model identifiers:
  ``deepseek-chat`` and ``deepseek-reasoner``.
@@ -67,7 +68,6 @@ _AGGREGATOR_PROVIDERS: frozenset[str] = frozenset({
 # Providers that want bare names with dots replaced by hyphens.
 _DOT_TO_HYPHEN_PROVIDERS: frozenset[str] = frozenset({
    "anthropic",
-    "opencode-zen",
 })

 # Providers that want bare names with dots preserved.
@@ -329,6 +329,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
        >>> normalize_model_for_provider("claude-sonnet-4.6", "opencode-zen")
        'claude-sonnet-4-6'

+        >>> normalize_model_for_provider("minimax-m2.5-free", "opencode-zen")
+        'minimax-m2.5-free'
+
        >>> normalize_model_for_provider("deepseek-v3", "deepseek")
        'deepseek-chat'

@@ -351,7 +354,16 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
+    # --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
+    if provider == "opencode-zen":
+        bare = _strip_matching_provider_prefix(name, provider)
+        if "/" in bare:
+            return bare
+        if bare.lower().startswith("claude-"):
+            return _dots_to_hyphens(bare)
+        return bare
+
+    # --- Anthropic: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
        bare = _strip_matching_provider_prefix(name, provider)
        if "/" in bare:
@@ -21,6 +21,7 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
 from __future__ import annotations

 import logging
+import re
 from dataclasses import dataclass
 from typing import List, NamedTuple, Optional

@@ -57,10 +58,36 @@ _HERMES_MODEL_WARNING = (
    "(Claude, GPT, Gemini, DeepSeek, etc.)."
 )

+# Match only the real Nous Research Hermes 3 / Hermes 4 chat families.
+# The previous substring check (`"hermes" in name.lower()`) false-positived on
+# unrelated local Modelfiles like ``hermes-brain:qwen3-14b-ctx16k`` that just
+# happen to carry "hermes" in their tag but are fully tool-capable.
+#
+# Positive examples the regex must match:
+#   NousResearch/Hermes-3-Llama-3.1-70B, hermes-4-405b, openrouter/hermes3:70b
+# Negative examples it must NOT match:
+#   hermes-brain:qwen3-14b-ctx16k, qwen3:14b, claude-opus-4-6
+_NOUS_HERMES_NON_AGENTIC_RE = re.compile(
+    r"(?:^|[/:])hermes[-_ ]?[34](?:[-_.:]|$)",
+    re.IGNORECASE,
+)
+
+
+def is_nous_hermes_non_agentic(model_name: str) -> bool:
+    """Return True if *model_name* is a real Nous Hermes 3/4 chat model.
+
+    Used to decide whether to surface the non-agentic warning at startup.
+    Callers in :mod:`cli.py` and here should go through this single helper
+    so the two sites don't drift.
+    """
+    if not model_name:
+        return False
+    return bool(_NOUS_HERMES_NON_AGENTIC_RE.search(model_name))
+

 def _check_hermes_model_warning(model_name: str) -> str:
-    """Return a warning string if *model_name* looks like a Hermes LLM model."""
-    if "hermes" in model_name.lower():
+    """Return a warning string if *model_name* is a Nous Hermes 3/4 chat model."""
+    if is_nous_hermes_non_agentic(model_name):
        return _HERMES_MODEL_WARNING
    return ""

@@ -839,8 +866,11 @@ def list_authenticated_providers(
                    if any(os.environ.get(ev) for ev in pcfg.api_key_env_vars):
                        has_creds = True
                        break
-        if not has_creds and overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
-            # These use auth stores, not env vars — check for auth.json entries
+        # Check auth store and credential pool for non-env-var credentials.
+        # This applies to OAuth providers AND api_key providers that also
+        # support OAuth (e.g. anthropic supports both API key and Claude Code
+        # OAuth via external credential files).
+        if not has_creds:
            try:
                from hermes_cli.auth import _load_auth_store
                store = _load_auth_store()
@@ -853,6 +883,38 @@ def list_authenticated_providers(
                    has_creds = True
            except Exception as exc:
                logger.debug("Auth store check failed for %s: %s", pid, exc)
+        # Fallback: check the credential pool with full auto-seeding.
+        # This catches credentials that exist in external stores (e.g.
+        # Codex CLI ~/.codex/auth.json) which _seed_from_singletons()
+        # imports on demand but aren't in the raw auth.json yet.
+        if not has_creds:
+            try:
+                from agent.credential_pool import load_pool
+                pool = load_pool(hermes_slug)
+                if pool.has_credentials():
+                    has_creds = True
+            except Exception as exc:
+                logger.debug("Credential pool check failed for %s: %s", hermes_slug, exc)
+        # Fallback: check external credential files directly.
+        # The credential pool gates anthropic behind
+        # is_provider_explicitly_configured() to prevent auxiliary tasks
+        # from silently consuming Claude Code tokens (PR #4210).
+        # But the /model picker is discovery-oriented — we WANT to show
+        # providers the user can switch to, even if they aren't currently
+        # configured.
+        if not has_creds and hermes_slug == "anthropic":
+            try:
+                from agent.anthropic_adapter import (
+                    read_claude_code_credentials,
+                    read_hermes_oauth_credentials,
+                )
+                hermes_creds = read_hermes_oauth_credentials()
+                cc_creds = read_claude_code_credentials()
+                if (hermes_creds and hermes_creds.get("accessToken")) or \
+                   (cc_creds and cc_creds.get("accessToken")):
+                    has_creds = True
+            except Exception as exc:
+                logger.debug("Anthropic external creds check failed: %s", exc)
        if not has_creds:
            continue

@@ -882,9 +944,16 @@ def list_authenticated_providers(
            api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or ""
            default_model = ep_cfg.get("default_model", "")

+            # Build models list from both default_model and full models array
            models_list = []
            if default_model:
                models_list.append(default_model)
+            # Also include the full models list from config
+            cfg_models = ep_cfg.get("models", [])
+            if isinstance(cfg_models, list):
+                for m in cfg_models:
+                    if m and m not in models_list:
+                        models_list.append(m)

            # Try to probe /v1/models if URL is set (but don't block on it)
            # For now just show what we know from config
@@ -70,13 +70,13 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
+        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
        "openai/gpt-5.4-mini",
-        "xiaomi/mimo-v2-pro",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
@@ -130,6 +130,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemma-4-26b-it",
    ],
    "zai": [
+        "glm-5.1",
        "glm-5",
        "glm-5-turbo",
        "glm-4.7",
@@ -546,6 +547,20 @@ _PROVIDER_ALIASES = {
 }


+def get_default_model_for_provider(provider: str) -> str:
+    """Return the default model for a provider, or empty string if unknown.
+
+    Uses the first entry in _PROVIDER_MODELS as the default.  This is the
+    model a user would be offered first in the ``hermes model`` picker.
+
+    Used as a fallback when the user has configured a provider but never
+    selected a model (e.g. ``hermes auth add openai-codex`` without
+    ``hermes model``).
+    """
+    models = _PROVIDER_MODELS.get(provider, [])
+    return models[0] if models else ""
+
+
 def _openrouter_model_is_free(pricing: Any) -> bool:
    """Return True when both prompt and completion pricing are zero."""
    if not isinstance(pricing, dict):
@@ -459,6 +459,16 @@ def create_profile(
                    dst.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(src, dst)

+    # Seed a default SOUL.md so the user has a file to customize immediately.
+    # Skipped when the profile already has one (from --clone / --clone-all).
+    soul_path = profile_dir / "SOUL.md"
+    if not soul_path.exists():
+        try:
+            from hermes_cli.default_soul import DEFAULT_SOUL_MD
+            soul_path.write_text(DEFAULT_SOUL_MD, encoding="utf-8")
+        except Exception:
+            pass  # best-effort — don't fail profile creation over this
+
    return profile_dir


@@ -275,6 +275,45 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            return None

    config = load_config()
+    
+    # First check providers: dict (new-style user-defined providers)
+    providers = config.get("providers")
+    if isinstance(providers, dict):
+        for ep_name, entry in providers.items():
+            if not isinstance(entry, dict):
+                continue
+            # Match exact name or normalized name
+            name_norm = _normalize_custom_provider_name(ep_name)
+            # Resolve the API key from the env var name stored in key_env
+            key_env = str(entry.get("key_env", "") or "").strip()
+            resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
+
+            if requested_norm in {ep_name, name_norm, f"custom:{name_norm}"}:
+                # Found match by provider key
+                base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
+                if base_url:
+                    return {
+                        "name": entry.get("name", ep_name),
+                        "base_url": base_url.strip(),
+                        "api_key": resolved_api_key,
+                        "model": entry.get("default_model", ""),
+                    }
+            # Also check the 'name' field if present
+            display_name = entry.get("name", "")
+            if display_name:
+                display_norm = _normalize_custom_provider_name(display_name)
+                if requested_norm in {display_name, display_norm, f"custom:{display_norm}"}:
+                    # Found match by display name
+                    base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
+                    if base_url:
+                        return {
+                            "name": display_name,
+                            "base_url": base_url.strip(),
+                            "api_key": resolved_api_key,
+                            "model": entry.get("default_model", ""),
+                        }
+
+    # Fall back to custom_providers: list (legacy format)
    custom_providers = config.get("custom_providers")
    if not isinstance(custom_providers, list):
        if isinstance(custom_providers, dict):
@@ -104,7 +104,7 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
        "gemma-4-31b-it", "gemma-4-26b-it",
    ],
-    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
+    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
@@ -2232,6 +2232,7 @@ def setup_gateway(config: dict):
        from hermes_cli.gateway import (
            _is_service_installed,
            _is_service_running,
+            supports_systemd_services,
            has_conflicting_systemd_units,
            install_linux_gateway_from_setup,
            print_systemd_scope_conflict_warning,
@@ -2244,16 +2245,18 @@ def setup_gateway(config: dict):

        service_installed = _is_service_installed()
        service_running = _is_service_running()
+        supports_systemd = supports_systemd_services()
+        supports_service_manager = supports_systemd or _is_macos

        print()
-        if _is_linux and has_conflicting_systemd_units():
+        if supports_systemd and has_conflicting_systemd_units():
            print_systemd_scope_conflict_warning()
            print()

        if service_running:
            if prompt_yes_no("  Restart the gateway to pick up changes?", True):
                try:
-                    if _is_linux:
+                    if supports_systemd:
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
@@ -2262,14 +2265,14 @@ def setup_gateway(config: dict):
        elif service_installed:
            if prompt_yes_no("  Start the gateway service?", True):
                try:
-                    if _is_linux:
+                    if supports_systemd:
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
                except Exception as e:
                    print_error(f"  Start failed: {e}")
-        elif _is_linux or _is_macos:
-            svc_name = "systemd" if _is_linux else "launchd"
+        elif supports_service_manager:
+            svc_name = "systemd" if supports_systemd else "launchd"
            if prompt_yes_no(
                f"  Install the gateway as a {svc_name} service? (runs in background, starts on boot)",
                True,
@@ -2277,7 +2280,7 @@ def setup_gateway(config: dict):
                try:
                    installed_scope = None
                    did_install = False
-                    if _is_linux:
+                    if supports_systemd:
                        installed_scope, did_install = install_linux_gateway_from_setup(force=False)
                    else:
                        launchd_install(force=False)
@@ -2285,7 +2288,7 @@ def setup_gateway(config: dict):
                    print()
                    if did_install and prompt_yes_no("  Start the service now?", True):
                        try:
-                            if _is_linux:
+                            if supports_systemd:
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
@@ -2296,12 +2299,21 @@ def setup_gateway(config: dict):
                    print_info("  You can try manually: hermes gateway install")
            else:
                print_info("  You can install later: hermes gateway install")
-                if _is_linux:
+                if supports_systemd:
                    print_info("  Or as a boot-time service: sudo hermes gateway install --system")
                print_info("  Or run in foreground:  hermes gateway")
        else:
-            print_info("Start the gateway to bring your bots online:")
-            print_info("   hermes gateway              # Run in foreground")
+            from hermes_constants import is_container
+            if is_container():
+                print_info("Start the gateway to bring your bots online:")
+                print_info("   hermes gateway run          # Run as container main process")
+                print_info("")
+                print_info("For automatic restarts, use a Docker restart policy:")
+                print_info("   docker run --restart unless-stopped ...")
+                print_info("   docker restart <container>  # Manual restart")
+            else:
+                print_info("Start the gateway to bring your bots online:")
+                print_info("   hermes gateway              # Run in foreground")

        print_info("━" * 50)

@@ -335,7 +335,23 @@ def do_install(identifier: str, category: str = "", force: bool = False,
    meta, bundle, _matched_source = _resolve_source_meta_and_bundle(identifier, sources)

    if not bundle:
-        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.\n")
+        # Check if any source hit GitHub API rate limit
+        rate_limited = any(
+            getattr(src, "is_rate_limited", False)
+            or getattr(getattr(src, "github", None), "is_rate_limited", False)
+            for src in sources
+        )
+        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.")
+        if rate_limited:
+            c.print(
+                "[yellow]Hint:[/] GitHub API rate limit exhausted "
+                "(unauthenticated: 60 requests/hour).\n"
+                "Set [bold]GITHUB_TOKEN[/] in your .env or install the "
+                "[bold]gh[/] CLI and run [bold]gh auth login[/] "
+                "to raise the limit to 5,000/hr.\n"
+            )
+        else:
+            c.print()
        return

    # Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
@@ -346,23 +346,35 @@ def show_status(args):
            print("  Note:         Android may stop background jobs when Termux is suspended")

    elif sys.platform.startswith('linux'):
-        try:
-            from hermes_cli.gateway import get_service_name
-            _gw_svc = get_service_name()
-        except Exception:
-            _gw_svc = "hermes-gateway"
-        try:
-            result = subprocess.run(
-                ["systemctl", "--user", "is-active", _gw_svc],
-                capture_output=True,
-                text=True,
-                timeout=5
-            )
-            is_active = result.stdout.strip() == "active"
-        except (FileNotFoundError, subprocess.TimeoutExpired):
-            is_active = False
-        print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
-        print("  Manager:      systemd (user)")
+        from hermes_constants import is_container
+        if is_container():
+            # Docker/Podman: no systemd — check for running gateway processes
+            try:
+                from hermes_cli.gateway import find_gateway_pids
+                gateway_pids = find_gateway_pids()
+                is_active = len(gateway_pids) > 0
+            except Exception:
+                is_active = False
+            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+            print("  Manager:      docker (foreground)")
+        else:
+            try:
+                from hermes_cli.gateway import get_service_name
+                _gw_svc = get_service_name()
+            except Exception:
+                _gw_svc = "hermes-gateway"
+            try:
+                result = subprocess.run(
+                    ["systemctl", "--user", "is-active", _gw_svc],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
+                is_active = result.stdout.strip() == "active"
+            except (FileNotFoundError, subprocess.TimeoutExpired):
+                is_active = False
+            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+            print("  Manager:      systemd (user)")
        
    elif sys.platform == 'darwin':
        from hermes_cli.gateway import get_launchd_label
@@ -0,0 +1,351 @@
+"""Random tips shown at CLI session start to help users discover features."""
+
+import random
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Tip corpus — one-liners covering slash commands, CLI flags, config,
+# keybindings, tools, gateway, skills, profiles, and workflow tricks.
+# ---------------------------------------------------------------------------
+
+TIPS = [
+    # --- Slash Commands ---
+    "/btw <question> asks a quick side question without tools or history — great for clarifications.",
+    "/background <prompt> runs a task in a separate session while your current one stays free.",
+    "/branch forks the current session so you can explore a different direction without losing progress.",
+    "/compress manually compresses conversation context when things get long.",
+    "/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
+    "/rollback diff 2 previews what changed since checkpoint 2 without restoring anything.",
+    "/rollback 2 src/file.py restores a single file from a specific checkpoint.",
+    "/title \"my project\" names your session — resume it later with /resume or hermes -c.",
+    "/resume picks up where you left off in a previously named session.",
+    "/queue <prompt> queues a message for the next turn without interrupting the current one.",
+    "/undo removes the last user/assistant exchange from the conversation.",
+    "/retry resends your last message — useful when the agent's response wasn't quite right.",
+    "/verbose cycles tool progress display: off → new → all → verbose.",
+    "/reasoning high increases the model's thinking depth. /reasoning show displays the reasoning.",
+    "/fast toggles priority processing for faster API responses (provider-dependent).",
+    "/yolo skips all dangerous command approval prompts for the rest of the session.",
+    "/model lets you switch models mid-session — try /model sonnet or /model gpt-5.",
+    "/model --global changes your default model permanently.",
+    "/personality pirate sets a fun personality — 14 built-in options from kawaii to shakespeare.",
+    "/skin changes the CLI theme — try ares, mono, slate, poseidon, or charizard.",
+    "/statusbar toggles a persistent bar showing model, tokens, context fill %, cost, and duration.",
+    "/tools disable browser temporarily removes browser tools for the current session.",
+    "/browser connect attaches browser tools to your running Chrome instance via CDP.",
+    "/plugins lists installed plugins and their status.",
+    "/cron manages scheduled tasks — set up recurring prompts with delivery to any platform.",
+    "/reload-mcp hot-reloads MCP server configuration without restarting.",
+    "/usage shows token usage, cost breakdown, and session duration.",
+    "/insights shows usage analytics for the last 30 days.",
+    "/paste checks your clipboard for an image and attaches it to your next message.",
+    "/profile shows which profile is active and its home directory.",
+    "/config shows your current configuration at a glance.",
+    "/stop kills all running background processes spawned by the agent.",
+
+    # --- @ Context References ---
+    "@file:path/to/file.py injects file contents directly into your message.",
+    "@file:main.py:10-50 injects only lines 10-50 of a file.",
+    "@folder:src/ injects a directory tree listing.",
+    "@diff injects your unstaged git changes into the message.",
+    "@staged injects your staged git changes (git diff --staged).",
+    "@git:5 injects the last 5 commits with full patches.",
+    "@url:https://example.com fetches and injects a web page's content.",
+    "Typing @ triggers filesystem path completion — navigate to any file interactively.",
+    "Combine multiple references: \"Review @file:main.py and @file:test.py for consistency.\"",
+
+    # --- Keybindings ---
+    "Alt+Enter (or Ctrl+J) inserts a newline for multi-line input.",
+    "Ctrl+C interrupts the agent. Double-press within 2 seconds to force exit.",
+    "Ctrl+Z suspends Hermes to the background — run fg in your shell to resume.",
+    "Tab accepts auto-suggestion ghost text or autocompletes slash commands.",
+    "Type a new message while the agent is working to interrupt and redirect it.",
+    "Alt+V pastes an image from your clipboard into the conversation.",
+    "Pasting 5+ lines auto-saves to a file and inserts a compact reference instead.",
+
+    # --- CLI Flags ---
+    "hermes -c resumes your most recent CLI session. hermes -c \"project name\" resumes by title.",
+    "hermes -w creates an isolated git worktree — perfect for parallel agent workflows.",
+    "hermes -w -q \"Fix issue #42\" combines worktree isolation with a one-shot query.",
+    "hermes chat -t web,terminal enables only specific toolsets for a focused session.",
+    "hermes chat -s github-pr-workflow preloads a skill at launch.",
+    "hermes chat -q \"query\" runs a single non-interactive query and exits.",
+    "hermes chat --max-turns 200 overrides the default 90-iteration limit per turn.",
+    "hermes chat --checkpoints enables filesystem snapshots before every destructive file change.",
+    "hermes --yolo bypasses all dangerous command approval prompts for the entire session.",
+    "hermes chat --source telegram tags the session for filtering in hermes sessions list.",
+    "hermes -p work chat runs under a specific profile without changing your default.",
+
+    # --- CLI Subcommands ---
+    "hermes doctor --fix diagnoses and auto-repairs config and dependency issues.",
+    "hermes dump outputs a compact setup summary — great for bug reports.",
+    "hermes config set KEY VALUE auto-routes secrets to .env and everything else to config.yaml.",
+    "hermes config edit opens config.yaml in your default editor.",
+    "hermes config check scans for missing or stale configuration options.",
+    "hermes sessions browse opens an interactive session picker with search.",
+    "hermes sessions stats shows session counts by platform and database size.",
+    "hermes sessions prune --older-than 30 cleans up old sessions.",
+    "hermes skills search react --source skills-sh searches the skills.sh public directory.",
+    "hermes skills check scans installed hub skills for upstream updates.",
+    "hermes skills tap add myorg/skills-repo adds a custom GitHub skill source.",
+    "hermes skills snapshot export setup.json exports your skill configuration for backup or sharing.",
+    "hermes mcp add github --command npx adds MCP servers from the command line.",
+    "hermes mcp serve runs Hermes itself as an MCP server for other agents.",
+    "hermes auth add lets you add multiple API keys for credential pool rotation.",
+    "hermes completion bash >> ~/.bashrc enables tab completion for all commands and profiles.",
+    "hermes logs -f follows agent.log in real time. --level WARNING --since 1h filters output.",
+    "hermes backup creates a zip backup of your entire Hermes home directory.",
+    "hermes profile create coder creates an isolated profile that becomes its own command.",
+    "hermes profile create work --clone copies your current config and keys to a new profile.",
+    "hermes update syncs new bundled skills to ALL profiles automatically.",
+    "hermes gateway install sets up Hermes as a system service (systemd/launchd).",
+    "hermes memory setup lets you configure an external memory provider (Honcho, Mem0, etc.).",
+    "hermes webhook subscribe creates event-driven webhook routes with HMAC validation.",
+
+    # --- Configuration ---
+    "Set display.bell_on_complete: true in config.yaml to hear a bell when long tasks finish.",
+    "Set display.streaming: true to see tokens appear in real time as the model generates.",
+    "Set display.show_reasoning: true to watch the model's chain-of-thought reasoning.",
+    "Set display.compact: true to reduce whitespace in output for denser information.",
+    "Set display.busy_input_mode: queue to queue messages instead of interrupting the agent.",
+    "Set display.resume_display: minimal to skip the full conversation recap on session resume.",
+    "Set compression.threshold: 0.50 to control when auto-compression fires (default: 50% of context).",
+    "Set agent.max_turns: 200 to let the agent take more tool-calling steps per turn.",
+    "Set file_read_max_chars: 200000 to increase the max content per read_file call.",
+    "Set approvals.mode: smart to let an LLM auto-approve safe commands and auto-deny dangerous ones.",
+    "Set fallback_model in config.yaml to automatically fail over to a backup provider.",
+    "Set privacy.redact_pii: true to hash user IDs and phone numbers before sending to the LLM.",
+    "Set browser.record_sessions: true to auto-record browser sessions as WebM videos.",
+    "Set worktree: true in config.yaml to always create a git worktree (same as hermes -w).",
+    "Set security.website_blocklist.enabled: true to block specific domains from web tools.",
+    "Set cron.wrap_response: false to deliver raw agent output without the cron header/footer.",
+    "HERMES_TIMEZONE overrides the server timezone with any IANA timezone string.",
+    "Environment variable substitution works in config.yaml: use ${VAR_NAME} syntax.",
+    "Quick commands in config.yaml run shell commands instantly with zero token usage.",
+    "Custom personalities can be defined in config.yaml under agent.personalities.",
+    "provider_routing controls OpenRouter provider sorting, whitelisting, and blacklisting.",
+
+    # --- Tools & Capabilities ---
+    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
+    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
+    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
+    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
+    "patch supports V4A format for bulk multi-file edits in a single call.",
+    "read_file suggests similar filenames when a file isn't found.",
+    "read_file auto-deduplicates — re-reading an unchanged file returns a lightweight stub.",
+    "browser_vision takes a screenshot and analyzes it with AI — works for CAPTCHAs and visual content.",
+    "browser_console can evaluate JavaScript expressions in the page context.",
+    "image_generate creates images with FLUX 2 Pro and automatic 2x upscaling.",
+    "text_to_speech converts text to audio — plays as voice bubbles on Telegram.",
+    "send_message can reach any connected messaging platform from within a session.",
+    "The todo tool helps the agent track complex multi-step tasks during a session.",
+    "session_search performs full-text search across ALL past conversations.",
+    "The agent automatically saves preferences, corrections, and environment facts to memory.",
+    "mixture_of_agents routes hard problems through 4 frontier LLMs collaboratively.",
+    "Terminal commands support background mode with notify_on_complete for long-running tasks.",
+    "Terminal background processes support watch_patterns to alert on specific output lines.",
+    "The terminal tool supports 6 backends: local, Docker, SSH, Modal, Daytona, and Singularity.",
+
+    # --- Profiles ---
+    "Each profile gets its own config, API keys, memory, sessions, skills, and cron jobs.",
+    "Profile names become shell commands — 'hermes profile create coder' creates the 'coder' command.",
+    "hermes profile export coder -o backup.tar.gz creates a portable profile archive.",
+    "If two profiles accidentally share a bot token, the second gateway is blocked with a clear error.",
+
+    # --- Sessions ---
+    "Sessions auto-generate descriptive titles after the first exchange — no manual naming needed.",
+    "Session titles support lineage: \"my project\" → \"my project #2\" → \"my project #3\".",
+    "When exiting, Hermes prints a resume command with session ID and stats.",
+    "hermes sessions export backup.jsonl exports all sessions for backup or analysis.",
+    "hermes -r SESSION_ID resumes any specific past session by its ID.",
+
+    # --- Memory ---
+    "Memory is a frozen snapshot — changes appear in the system prompt only at next session start.",
+    "Memory entries are automatically scanned for prompt injection and exfiltration patterns.",
+    "The agent has two memory stores: personal notes (~2200 chars) and user profile (~1375 chars).",
+    "Corrections you give the agent (\"no, do it this way\") are often auto-saved to memory.",
+
+    # --- Skills ---
+    "Over 80 bundled skills covering github, creative, mlops, productivity, research, and more.",
+    "Every installed skill automatically becomes a slash command — type / to see them all.",
+    "hermes skills install official/security/1password installs optional skills from the repo.",
+    "Skills can restrict to specific OS platforms — some only load on macOS or Linux.",
+    "skills.external_dirs in config.yaml lets you load skills from custom directories.",
+    "The agent can create its own skills as procedural memory using skill_manage.",
+    "The plan skill saves markdown plans under .hermes/plans/ in the active workspace.",
+
+    # --- Cron & Scheduling ---
+    "Cron jobs can attach skills: hermes cron add --skill blogwatcher \"Check for new posts\".",
+    "Cron delivery targets include telegram, discord, slack, email, sms, and 12+ more platforms.",
+    "If a cron response starts with [SILENT], delivery is suppressed — useful for monitoring-only jobs.",
+    "Cron supports relative delays (30m), intervals (every 2h), cron expressions, and ISO timestamps.",
+    "Cron jobs run in completely fresh agent sessions — prompts must be self-contained.",
+
+    # --- Voice ---
+    "Voice mode works with zero API keys if faster-whisper is installed (free local speech-to-text).",
+    "Five TTS providers available: Edge TTS (free), ElevenLabs, OpenAI, NeuTTS (free local), MiniMax.",
+    "/voice on enables voice mode in the CLI. Ctrl+B toggles push-to-talk recording.",
+    "Streaming TTS plays sentences as they generate — you don't wait for the full response.",
+    "Voice messages on Telegram, Discord, WhatsApp, and Slack are auto-transcribed.",
+
+    # --- Gateway & Messaging ---
+    "Hermes runs on 18 platforms: Telegram, Discord, Slack, WhatsApp, Signal, Matrix, email, and more.",
+    "hermes gateway install sets it up as a system service that starts on boot.",
+    "DingTalk uses Stream Mode — no webhooks or public URL needed.",
+    "BlueBubbles brings iMessage to Hermes via a local macOS server.",
+    "Webhook routes support HMAC validation, rate limiting, and event filtering.",
+    "The API server exposes an OpenAI-compatible endpoint compatible with Open WebUI and LibreChat.",
+    "Discord voice channel mode: the bot joins VC, transcribes speech, and talks back.",
+    "group_sessions_per_user: true gives each person their own session in group chats.",
+    "/sethome marks a chat as the home channel for cron job deliveries.",
+    "The gateway supports inactivity-based timeouts — active agents can run indefinitely.",
+
+    # --- Security ---
+    "Dangerous command approval has 4 tiers: once, session, always (permanent allowlist), deny.",
+    "Smart approval mode uses an LLM to auto-approve safe commands and flag dangerous ones.",
+    "SSRF protection blocks private networks, loopback, link-local, and cloud metadata addresses.",
+    "Tirith pre-exec scanning detects homograph URL spoofing and pipe-to-interpreter patterns.",
+    "MCP subprocesses receive a filtered environment — only safe system vars pass through.",
+    "Context files (.hermes.md, AGENTS.md) are security-scanned for prompt injection before loading.",
+    "command_allowlist in config.yaml permanently approves specific shell command patterns.",
+
+    # --- Context & Compression ---
+    "Context auto-compresses when it reaches the threshold — memories are flushed and history summarized.",
+    "The status bar turns yellow, then orange, then red as context fills up.",
+    "SOUL.md at ~/.hermes/SOUL.md is the agent's primary identity — customize it to shape behavior.",
+    "Hermes loads project context from .hermes.md, AGENTS.md, CLAUDE.md, or .cursorrules (first match).",
+    "Subdirectory AGENTS.md files are discovered progressively as the agent navigates into folders.",
+    "Context files are capped at 20,000 characters with smart head/tail truncation.",
+
+    # --- Browser ---
+    "Five browser providers: local Chromium, Browserbase, Browser Use, Camofox, and Firecrawl.",
+    "Camofox is an anti-detection browser — Firefox fork with C++ fingerprint spoofing.",
+    "browser_navigate returns a page snapshot automatically — no need to call browser_snapshot after.",
+    "browser_vision with annotate=true overlays numbered labels on interactive elements.",
+
+    # --- MCP ---
+    "MCP servers are configured in config.yaml — both stdio and HTTP transports supported.",
+    "Per-server tool filtering: tools.include whitelists and tools.exclude blacklists specific tools.",
+    "MCP servers auto-generate toolsets at runtime — hermes tools can toggle them per platform.",
+    "MCP OAuth support: auth: oauth enables browser-based authorization with PKCE.",
+
+    # --- Checkpoints & Rollback ---
+    "Checkpoints have zero overhead when no files are modified — enabled by default.",
+    "A pre-rollback snapshot is saved automatically so you can undo the undo.",
+    "/rollback also undoes the conversation turn, so the agent doesn't remember rolled-back changes.",
+    "Checkpoints use shadow repos in ~/.hermes/checkpoints/ — your project's .git is never touched.",
+
+    # --- Batch & Data ---
+    "batch_runner.py processes hundreds of prompts in parallel for training data generation.",
+    "hermes chat -Q enables quiet mode for programmatic use — suppresses banner and spinner.",
+    "Trajectory saving (--save-trajectories) captures full tool-use traces for model training.",
+
+    # --- Plugins ---
+    "Three plugin types: general (tools/hooks), memory providers, and context engines.",
+    "hermes plugins install owner/repo installs plugins directly from GitHub.",
+    "8 external memory providers available: Honcho, OpenViking, Mem0, Hindsight, and more.",
+    "Plugin hooks include pre_tool_call, post_tool_call, pre_llm_call, and post_llm_call.",
+
+    # --- Miscellaneous ---
+    "Prompt caching (Anthropic) reduces costs by reusing cached system prompt prefixes.",
+    "The agent auto-generates session titles in a background thread — zero latency impact.",
+    "Smart model routing can auto-route simple queries to a cheaper model.",
+    "Slash commands support prefix matching: /h resolves to /help, /mod to /model.",
+    "Dragging a file path into the terminal auto-attaches images or sends as context.",
+    ".worktreeinclude in your repo root lists gitignored files to copy into worktrees.",
+    "hermes acp runs Hermes as an ACP server for VS Code, Zed, and JetBrains integration.",
+    "Custom providers: save named endpoints in config.yaml under custom_providers.",
+    "HERMES_EPHEMERAL_SYSTEM_PROMPT injects a system prompt that's never persisted to history.",
+    "credential_pool_strategies supports fill_first, round_robin, least_used, and random rotation.",
+    "hermes login supports OAuth-based auth for Nous and OpenAI Codex providers.",
+    "The API server supports both Chat Completions and Responses API with server-side state.",
+    "tool_preview_length: 0 in config shows full file paths in the spinner's activity feed.",
+    "hermes status --deep runs deeper diagnostic checks across all components.",
+
+    # --- Hidden Gems & Power-User Tricks ---
+    "BOOT.md at ~/.hermes/BOOT.md runs automatically on every gateway start — use it for startup checks.",
+    "Cron jobs can attach a Python script (--script) whose stdout is injected into the prompt as context.",
+    "Cron scripts live in ~/.hermes/scripts/ and run before the agent — perfect for data collection pipelines.",
+    "prefill_messages_file in config.yaml injects few-shot examples into every API call, never saved to history.",
+    "SOUL.md completely replaces the agent's default identity — rewrite it to make Hermes your own.",
+    "SOUL.md is auto-seeded with a default personality on first run. Edit ~/.hermes/SOUL.md to customize.",
+    "/compress <focus topic> allocates 60-70% of the summary budget to your topic and aggressively trims the rest.",
+    "On second+ compression, the compressor updates the previous summary instead of starting from scratch.",
+    "Before a gateway session reset, Hermes auto-flushes important facts to memory in the background.",
+    "network.force_ipv4: true in config.yaml fixes hangs on servers with broken IPv6 — monkey-patches socket.",
+    "The terminal tool annotates common exit codes: grep returning 1 = 'No matches found (not an error)'.",
+    "Failed foreground terminal commands auto-retry up to 3 times with exponential backoff (2s, 4s, 8s).",
+    "Bare sudo commands are auto-rewritten to pipe SUDO_PASSWORD from .env — no interactive prompt needed.",
+    "execute_code has built-in helpers: json_parse() for tolerant parsing, shell_quote(), and retry() with backoff.",
+    "execute_code's 7 sandbox tools (web_search, terminal, read/write/search/patch) use RPC — never enter context.",
+    "Reading the same file region 3+ times triggers a warning. At 4+, it's hard-blocked to prevent loops.",
+    "write_file and patch detect if a file was externally modified since the last read and warn about staleness.",
+    "V4A patch format supports Add File, Delete File, and Move File directives — not just Update.",
+    "MCP servers can request LLM completions back via sampling — the agent becomes a tool for the server.",
+    "MCP servers send notifications/tools/list_changed to trigger automatic tool re-registration without restart.",
+    "delegate_task with acp_command: 'claude' spawns Claude Code as a child agent from any platform.",
+    "Delegation has a heartbeat thread — child activity propagates to the parent, preventing gateway timeouts.",
+    "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
+    "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
+    "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
+    "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
+    "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
+    "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
+    "Stale git worktrees are auto-cleaned: 24-72h old with no unpushed commits get pruned on startup.",
+    "Each profile gets its own subprocess HOME at HERMES_HOME/home/ — isolated git, ssh, npm, gh configs.",
+    "HERMES_HOME_MODE env var (octal, e.g. 0701) sets custom directory permissions for web server traversal.",
+    "Container mode: place .container-mode in HERMES_HOME and the host CLI auto-execs into the container.",
+    "Ctrl+C has 5 priority tiers: cancel recording → cancel prompts → cancel picker → interrupt agent → exit.",
+    "Every interrupt during an agent run is logged to ~/.hermes/interrupt_debug.log with timestamps.",
+    "BROWSER_CDP_URL connects browser tools to any running Chrome — accepts WebSocket, HTTP, or host:port.",
+    "BROWSERBASE_ADVANCED_STEALTH=true enables advanced anti-detection with custom Chromium (Scale Plan).",
+    "The CLI auto-switches to compact mode in terminals narrower than 80 columns.",
+    "Quick commands support two types: exec (run shell command directly) and alias (redirect to another command).",
+    "Per-task delegation model: delegation.model and delegation.provider in config route subagents to cheaper models.",
+    "delegation.reasoning_effort independently controls thinking depth for subagents.",
+    "display.platforms in config.yaml allows per-platform display overrides: {telegram: {tool_progress: all}}.",
+    "human_delay.mode in config simulates human typing speed — configurable min_ms/max_ms range.",
+    "Config version migrations run automatically on load — new config keys appear without manual intervention.",
+    "GPT and Codex models get special system prompt guidance for tool discipline and mandatory tool use.",
+    "Gemini models get tailored directives for absolute paths, parallel tool calls, and non-interactive commands.",
+    "context.engine in config.yaml can be set to a plugin name for alternative context management strategies.",
+    "Browser pages over 8000 tokens are auto-summarized by the auxiliary LLM before returning to the agent.",
+    "The compressor does a cheap pre-pass: tool outputs over 200 chars are replaced with placeholders before the LLM runs.",
+    "When compression fails, further attempts are paused for 10 minutes to avoid API hammering.",
+    "Long dangerous commands (>70 chars) get a 'view' option in the approval prompt to see the full text first.",
+    "Audio level visualization shows ▁▂▃▄▅▆▇ bars during voice recording based on microphone RMS levels.",
+    "Profile names cannot collide with existing PATH binaries — 'hermes profile create ls' would be rejected.",
+    "hermes profile create backup --clone-all copies everything (config, keys, SOUL.md, memories, skills, sessions).",
+    "The voice record key is configurable via voice.record_key in config.yaml — not just Ctrl+B.",
+    ".cursorrules and .cursor/rules/*.mdc files are auto-detected and loaded as project context.",
+    "Context files support 10+ prompt injection patterns — invisible Unicode, 'ignore instructions', exfil attempts.",
+    "GPT-5 and Codex use 'developer' role instead of 'system' in the message format.",
+    "Per-task auxiliary overrides: auxiliary.vision.provider, auxiliary.compression.model, etc. in config.yaml.",
+    "The auxiliary client treats 'main' as a provider alias — resolves to your actual primary provider + model.",
+    "Smart routing can auto-route simple queries to a cheaper model — set smart_model_routing.enabled: true.",
+    "hermes claw migrate --dry-run previews OpenClaw migration without writing anything.",
+    "File paths pasted with quotes or escaped spaces are handled automatically — no manual cleanup needed.",
+    "Slash commands never trigger the large-paste collapse — /command with big arguments works correctly.",
+    "In interrupt mode, slash commands typed during agent execution bypass interrupt logic and run immediately.",
+    "HERMES_DEV=1 bypasses container mode detection for local development.",
+    "Each MCP server gets its own toolset (mcp-servername) that can be toggled independently via hermes tools.",
+    "MCP ${ENV_VAR} placeholders in config are resolved at server spawn — including vars from ~/.hermes/.env.",
+    "Skills from trusted repos (NousResearch) get a 'trusted' security level; community skills get extra scanning.",
+    "The skills quarantine at ~/.hermes/skills/.hub/quarantine/ holds skills pending security review.",
+]
+
+
+def get_random_tip(exclude_recent: int = 0) -> str:
+    """Return a random tip string.
+
+    Args:
+        exclude_recent: not used currently; reserved for future
+            deduplication across sessions.
+    """
+    return random.choice(TIPS)
+
+
+def get_tip_count() -> int:
+    """Return the total number of tips available."""
+    return len(TIPS)
@@ -0,0 +1,929 @@
+"""
+Hermes Agent — Web UI server.
+
+Provides a FastAPI backend serving the Vite/React frontend and REST API
+endpoints for managing configuration, environment variables, and sessions.
+
+Usage:
+    python -m hermes_cli.main web          # Start on http://127.0.0.1:9119
+    python -m hermes_cli.main web --port 8080
+"""
+
+import logging
+import os
+import secrets
+import sys
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from hermes_cli import __version__, __release_date__
+from hermes_cli.config import (
+    DEFAULT_CONFIG,
+    OPTIONAL_ENV_VARS,
+    get_config_path,
+    get_env_path,
+    get_hermes_home,
+    load_config,
+    load_env,
+    save_config,
+    save_env_value,
+    remove_env_value,
+    check_config_version,
+    redact_key,
+)
+from gateway.status import get_running_pid, read_runtime_status
+
+try:
+    from fastapi import FastAPI, HTTPException, Request
+    from fastapi.middleware.cors import CORSMiddleware
+    from fastapi.responses import FileResponse, JSONResponse
+    from fastapi.staticfiles import StaticFiles
+    from pydantic import BaseModel
+except ImportError:
+    raise SystemExit(
+        "Web UI requires fastapi and uvicorn.\n"
+        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+    )
+
+WEB_DIST = Path(__file__).parent / "web_dist"
+_log = logging.getLogger(__name__)
+
+app = FastAPI(title="Hermes Agent", version=__version__)
+
+# ---------------------------------------------------------------------------
+# Session token for protecting sensitive endpoints (reveal).
+# Generated fresh on every server start — dies when the process exits.
+# Injected into the SPA HTML so only the legitimate web UI can use it.
+# ---------------------------------------------------------------------------
+_SESSION_TOKEN = secrets.token_urlsafe(32)
+
+# Simple rate limiter for the reveal endpoint
+_reveal_timestamps: List[float] = []
+_REVEAL_MAX_PER_WINDOW = 5
+_REVEAL_WINDOW_SECONDS = 30
+
+# CORS: restrict to localhost origins only.  The web UI is intended to run
+# locally; binding to 0.0.0.0 with allow_origins=["*"] would let any website
+# read/modify config and secrets.
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origin_regex=r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$",
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ---------------------------------------------------------------------------
+# Config schema — auto-generated from DEFAULT_CONFIG
+# ---------------------------------------------------------------------------
+
+# Manual overrides for fields that need select options or custom types
+_SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
+    "model": {
+        "type": "string",
+        "description": "Default model (e.g. anthropic/claude-sonnet-4.6)",
+        "category": "general",
+    },
+    "terminal.backend": {
+        "type": "select",
+        "description": "Terminal execution backend",
+        "options": ["local", "docker", "ssh", "modal", "daytona", "singularity"],
+    },
+    "terminal.modal_mode": {
+        "type": "select",
+        "description": "Modal sandbox mode",
+        "options": ["sandbox", "function"],
+    },
+    "tts.provider": {
+        "type": "select",
+        "description": "Text-to-speech provider",
+        "options": ["edge", "elevenlabs", "openai", "neutts"],
+    },
+    "stt.provider": {
+        "type": "select",
+        "description": "Speech-to-text provider",
+        "options": ["local", "openai", "mistral"],
+    },
+    "display.skin": {
+        "type": "select",
+        "description": "CLI visual theme",
+        "options": ["default", "ares", "mono", "slate"],
+    },
+    "display.resume_display": {
+        "type": "select",
+        "description": "How resumed sessions display history",
+        "options": ["minimal", "full", "off"],
+    },
+    "display.busy_input_mode": {
+        "type": "select",
+        "description": "Input behavior while agent is running",
+        "options": ["queue", "interrupt", "block"],
+    },
+    "memory.provider": {
+        "type": "select",
+        "description": "Memory provider plugin",
+        "options": ["builtin", "honcho"],
+    },
+    "approvals.mode": {
+        "type": "select",
+        "description": "Dangerous command approval mode",
+        "options": ["ask", "yolo", "deny"],
+    },
+    "context.engine": {
+        "type": "select",
+        "description": "Context management engine",
+        "options": ["default", "custom"],
+    },
+    "human_delay.mode": {
+        "type": "select",
+        "description": "Simulated typing delay mode",
+        "options": ["off", "typing", "fixed"],
+    },
+    "logging.level": {
+        "type": "select",
+        "description": "Log level for agent.log",
+        "options": ["DEBUG", "INFO", "WARNING", "ERROR"],
+    },
+    "agent.service_tier": {
+        "type": "select",
+        "description": "API service tier (OpenAI/Anthropic)",
+        "options": ["", "auto", "default", "flex"],
+    },
+    "delegation.reasoning_effort": {
+        "type": "select",
+        "description": "Reasoning effort for delegated subagents",
+        "options": ["", "low", "medium", "high"],
+    },
+}
+
+# Categories with fewer fields get merged into "general" to avoid tab sprawl.
+_CATEGORY_MERGE: Dict[str, str] = {
+    "privacy": "security",
+    "context": "agent",
+    "skills": "agent",
+    "cron": "agent",
+    "network": "agent",
+    "checkpoints": "agent",
+    "approvals": "security",
+    "human_delay": "display",
+    "smart_model_routing": "agent",
+}
+
+# Display order for tabs — unlisted categories sort alphabetically after these.
+_CATEGORY_ORDER = [
+    "general", "agent", "terminal", "display", "delegation",
+    "memory", "compression", "security", "browser", "voice",
+    "tts", "stt", "logging", "discord", "auxiliary",
+]
+
+
+def _infer_type(value: Any) -> str:
+    """Infer a UI field type from a Python value."""
+    if isinstance(value, bool):
+        return "boolean"
+    if isinstance(value, int):
+        return "number"
+    if isinstance(value, float):
+        return "number"
+    if isinstance(value, list):
+        return "list"
+    if isinstance(value, dict):
+        return "object"
+    return "string"
+
+
+def _build_schema_from_config(
+    config: Dict[str, Any],
+    prefix: str = "",
+) -> Dict[str, Dict[str, Any]]:
+    """Walk DEFAULT_CONFIG and produce a flat dot-path → field schema dict."""
+    schema: Dict[str, Dict[str, Any]] = {}
+    for key, value in config.items():
+        full_key = f"{prefix}.{key}" if prefix else key
+
+        # Skip internal / version keys
+        if full_key in ("_config_version",):
+            continue
+
+        # Category is the first path component for nested keys, or "general"
+        # for top-level scalar fields (model, toolsets, timezone, etc.).
+        if prefix:
+            category = prefix.split(".")[0]
+        elif isinstance(value, dict):
+            category = key
+        else:
+            category = "general"
+
+        if isinstance(value, dict):
+            # Recurse into nested dicts
+            schema.update(_build_schema_from_config(value, full_key))
+        else:
+            entry: Dict[str, Any] = {
+                "type": _infer_type(value),
+                "description": full_key.replace(".", " → ").replace("_", " ").title(),
+                "category": category,
+            }
+            # Apply manual overrides
+            if full_key in _SCHEMA_OVERRIDES:
+                entry.update(_SCHEMA_OVERRIDES[full_key])
+            # Merge small categories
+            entry["category"] = _CATEGORY_MERGE.get(entry["category"], entry["category"])
+            schema[full_key] = entry
+    return schema
+
+
+CONFIG_SCHEMA = _build_schema_from_config(DEFAULT_CONFIG)
+
+
+class ConfigUpdate(BaseModel):
+    config: dict
+
+
+class EnvVarUpdate(BaseModel):
+    key: str
+    value: str
+
+
+class EnvVarDelete(BaseModel):
+    key: str
+
+
+class EnvVarReveal(BaseModel):
+    key: str
+
+
+@app.get("/api/status")
+async def get_status():
+    current_ver, latest_ver = check_config_version()
+
+    gateway_pid = get_running_pid()
+    gateway_running = gateway_pid is not None
+
+    gateway_state = None
+    gateway_platforms: dict = {}
+    gateway_exit_reason = None
+    gateway_updated_at = None
+    configured_gateway_platforms: set[str] | None = None
+    try:
+        from gateway.config import load_gateway_config
+
+        gateway_config = load_gateway_config()
+        configured_gateway_platforms = {
+            platform.value for platform in gateway_config.get_connected_platforms()
+        }
+    except Exception:
+        configured_gateway_platforms = None
+
+    runtime = read_runtime_status()
+    if runtime:
+        gateway_state = runtime.get("gateway_state")
+        gateway_platforms = runtime.get("platforms") or {}
+        if configured_gateway_platforms is not None:
+            gateway_platforms = {
+                key: value
+                for key, value in gateway_platforms.items()
+                if key in configured_gateway_platforms
+            }
+        gateway_exit_reason = runtime.get("exit_reason")
+        gateway_updated_at = runtime.get("updated_at")
+        if not gateway_running:
+            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
+            gateway_platforms = {}
+
+    active_sessions = 0
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            sessions = db.list_sessions_rich(limit=50)
+            now = time.time()
+            active_sessions = sum(
+                1 for s in sessions
+                if s.get("ended_at") is None
+                and (now - s.get("last_active", s.get("started_at", 0))) < 300
+            )
+        finally:
+            db.close()
+    except Exception:
+        pass
+
+    return {
+        "version": __version__,
+        "release_date": __release_date__,
+        "hermes_home": str(get_hermes_home()),
+        "config_path": str(get_config_path()),
+        "env_path": str(get_env_path()),
+        "config_version": current_ver,
+        "latest_config_version": latest_ver,
+        "gateway_running": gateway_running,
+        "gateway_pid": gateway_pid,
+        "gateway_state": gateway_state,
+        "gateway_platforms": gateway_platforms,
+        "gateway_exit_reason": gateway_exit_reason,
+        "gateway_updated_at": gateway_updated_at,
+        "active_sessions": active_sessions,
+    }
+
+
+@app.get("/api/sessions")
+async def get_sessions():
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            sessions = db.list_sessions_rich(limit=20)
+            now = time.time()
+            for s in sessions:
+                s["is_active"] = (
+                    s.get("ended_at") is None
+                    and (now - s.get("last_active", s.get("started_at", 0))) < 300
+                )
+            return sessions
+        finally:
+            db.close()
+    except Exception as e:
+        _log.exception("GET /api/sessions failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.get("/api/sessions/search")
+async def search_sessions(q: str = "", limit: int = 20):
+    """Full-text search across session message content using FTS5."""
+    if not q or not q.strip():
+        return {"results": []}
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        try:
+            # Auto-add prefix wildcards so partial words match
+            # e.g. "nimb" → "nimb*" matches "nimby"
+            # Preserve quoted phrases and existing wildcards as-is
+            import re
+            terms = []
+            for token in re.findall(r'"[^"]*"|\S+', q.strip()):
+                if token.startswith('"') or token.endswith("*"):
+                    terms.append(token)
+                else:
+                    terms.append(token + "*")
+            prefix_query = " ".join(terms)
+            matches = db.search_messages(query=prefix_query, limit=limit)
+            # Group by session_id — return unique sessions with their best snippet
+            seen: dict = {}
+            for m in matches:
+                sid = m["session_id"]
+                if sid not in seen:
+                    seen[sid] = {
+                        "session_id": sid,
+                        "snippet": m.get("snippet", ""),
+                        "role": m.get("role"),
+                        "source": m.get("source"),
+                        "model": m.get("model"),
+                        "session_started": m.get("session_started"),
+                    }
+            return {"results": list(seen.values())}
+        finally:
+            db.close()
+    except Exception:
+        _log.exception("GET /api/sessions/search failed")
+        raise HTTPException(status_code=500, detail="Search failed")
+
+
+def _normalize_config_for_web(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Normalize config for the web UI.
+
+    Hermes supports ``model`` as either a bare string (``"anthropic/claude-sonnet-4"``)
+    or a dict (``{default: ..., provider: ..., base_url: ...}``).  The schema is built
+    from DEFAULT_CONFIG where ``model`` is a string, but user configs often have the
+    dict form.  Normalize to the string form so the frontend schema matches.
+    """
+    config = dict(config)  # shallow copy
+    model_val = config.get("model")
+    if isinstance(model_val, dict):
+        config["model"] = model_val.get("default", model_val.get("name", ""))
+    return config
+
+
+@app.get("/api/config")
+async def get_config():
+    config = _normalize_config_for_web(load_config())
+    # Strip internal keys that the frontend shouldn't see or send back
+    return {k: v for k, v in config.items() if not k.startswith("_")}
+
+
+@app.get("/api/config/defaults")
+async def get_defaults():
+    return DEFAULT_CONFIG
+
+
+@app.get("/api/config/schema")
+async def get_schema():
+    return {"fields": CONFIG_SCHEMA, "category_order": _CATEGORY_ORDER}
+
+
+def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Reverse _normalize_config_for_web before saving.
+
+    Reconstructs ``model`` as a dict by reading the current on-disk config
+    to recover model subkeys (provider, base_url, api_mode, etc.) that were
+    stripped from the GET response.  The frontend only sees model as a flat
+    string; the rest is preserved transparently.
+    """
+    config = dict(config)
+    # Remove any _model_meta that might have leaked in (shouldn't happen
+    # with the stripped GET response, but be defensive)
+    config.pop("_model_meta", None)
+
+    model_val = config.get("model")
+    if isinstance(model_val, str) and model_val:
+        # Read the current disk config to recover model subkeys
+        try:
+            disk_config = load_config()
+            disk_model = disk_config.get("model")
+            if isinstance(disk_model, dict):
+                # Preserve all subkeys, update default with the new value
+                disk_model["default"] = model_val
+                config["model"] = disk_model
+        except Exception:
+            pass  # can't read disk config — just use the string form
+    return config
+
+
+@app.put("/api/config")
+async def update_config(body: ConfigUpdate):
+    try:
+        save_config(_denormalize_config_from_web(body.config))
+        return {"ok": True}
+    except Exception as e:
+        _log.exception("PUT /api/config failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.get("/api/auth/session-token")
+async def get_session_token():
+    """Return the ephemeral session token for this server instance.
+
+    The token protects sensitive endpoints (reveal).  It's served to the SPA
+    which stores it in memory — it's never persisted and dies when the server
+    process exits.  CORS already restricts this to localhost origins.
+    """
+    return {"token": _SESSION_TOKEN}
+
+
+@app.get("/api/env")
+async def get_env_vars():
+    env_on_disk = load_env()
+    result = {}
+    for var_name, info in OPTIONAL_ENV_VARS.items():
+        value = env_on_disk.get(var_name)
+        result[var_name] = {
+            "is_set": bool(value),
+            "redacted_value": redact_key(value) if value else None,
+            "description": info.get("description", ""),
+            "url": info.get("url"),
+            "category": info.get("category", ""),
+            "is_password": info.get("password", False),
+            "tools": info.get("tools", []),
+            "advanced": info.get("advanced", False),
+        }
+    return result
+
+
+@app.put("/api/env")
+async def set_env_var(body: EnvVarUpdate):
+    try:
+        save_env_value(body.key, body.value)
+        return {"ok": True, "key": body.key}
+    except Exception as e:
+        _log.exception("PUT /api/env failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.delete("/api/env")
+async def remove_env_var(body: EnvVarDelete):
+    try:
+        removed = remove_env_value(body.key)
+        if not removed:
+            raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
+        return {"ok": True, "key": body.key}
+    except HTTPException:
+        raise
+    except Exception as e:
+        _log.exception("DELETE /api/env failed")
+        raise HTTPException(status_code=500, detail="Internal server error")
+
+
+@app.post("/api/env/reveal")
+async def reveal_env_var(body: EnvVarReveal, request: Request):
+    """Return the real (unredacted) value of a single env var.
+
+    Protected by:
+    - Ephemeral session token (generated per server start, injected into SPA)
+    - Rate limiting (max 5 reveals per 30s window)
+    - Audit logging
+    """
+    # --- Token check ---
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+    # --- Rate limit ---
+    now = time.time()
+    cutoff = now - _REVEAL_WINDOW_SECONDS
+    _reveal_timestamps[:] = [t for t in _reveal_timestamps if t > cutoff]
+    if len(_reveal_timestamps) >= _REVEAL_MAX_PER_WINDOW:
+        raise HTTPException(status_code=429, detail="Too many reveal requests. Try again shortly.")
+    _reveal_timestamps.append(now)
+
+    # --- Reveal ---
+    env_on_disk = load_env()
+    value = env_on_disk.get(body.key)
+    if value is None:
+        raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
+
+    _log.info("env/reveal: %s", body.key)
+    return {"key": body.key, "value": value}
+
+
+# ---------------------------------------------------------------------------
+# Session detail endpoints
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/sessions/{session_id}")
+async def get_session_detail(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        sid = db.resolve_session_id(session_id)
+        session = db.get_session(sid) if sid else None
+        if not session:
+            raise HTTPException(status_code=404, detail="Session not found")
+        return session
+    finally:
+        db.close()
+
+
+@app.get("/api/sessions/{session_id}/messages")
+async def get_session_messages(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        sid = db.resolve_session_id(session_id)
+        if not sid:
+            raise HTTPException(status_code=404, detail="Session not found")
+        messages = db.get_messages(sid)
+        return {"session_id": sid, "messages": messages}
+    finally:
+        db.close()
+
+
+@app.delete("/api/sessions/{session_id}")
+async def delete_session_endpoint(session_id: str):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        if not db.delete_session(session_id):
+            raise HTTPException(status_code=404, detail="Session not found")
+        return {"ok": True}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Log viewer endpoint
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/logs")
+async def get_logs(
+    file: str = "agent",
+    lines: int = 100,
+    level: Optional[str] = None,
+    component: Optional[str] = None,
+):
+    from hermes_cli.logs import _read_tail, LOG_FILES
+
+    log_name = LOG_FILES.get(file)
+    if not log_name:
+        raise HTTPException(status_code=400, detail=f"Unknown log file: {file}")
+    log_path = get_hermes_home() / "logs" / log_name
+    if not log_path.exists():
+        return {"file": file, "lines": []}
+
+    try:
+        from hermes_logging import COMPONENT_PREFIXES
+    except ImportError:
+        COMPONENT_PREFIXES = {}
+
+    has_filters = bool(level or component)
+    comp_prefixes = COMPONENT_PREFIXES.get(component, ()) if component else ()
+    result = _read_tail(
+        log_path, min(lines, 500),
+        has_filters=has_filters,
+        min_level=level,
+        component_prefixes=comp_prefixes,
+    )
+    return {"file": file, "lines": result}
+
+
+# ---------------------------------------------------------------------------
+# Cron job management endpoints
+# ---------------------------------------------------------------------------
+
+
+class CronJobCreate(BaseModel):
+    prompt: str
+    schedule: str
+    name: str = ""
+    deliver: str = "local"
+
+
+class CronJobUpdate(BaseModel):
+    updates: dict
+
+
+@app.get("/api/cron/jobs")
+async def list_cron_jobs():
+    from cron.jobs import list_jobs
+    return list_jobs(include_disabled=True)
+
+
+@app.get("/api/cron/jobs/{job_id}")
+async def get_cron_job(job_id: str):
+    from cron.jobs import get_job
+    job = get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs")
+async def create_cron_job(body: CronJobCreate):
+    from cron.jobs import create_job
+    try:
+        job = create_job(prompt=body.prompt, schedule=body.schedule,
+                         name=body.name, deliver=body.deliver)
+        return job
+    except Exception as e:
+        _log.exception("POST /api/cron/jobs failed")
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@app.put("/api/cron/jobs/{job_id}")
+async def update_cron_job(job_id: str, body: CronJobUpdate):
+    from cron.jobs import update_job
+    job = update_job(job_id, body.updates)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/pause")
+async def pause_cron_job(job_id: str):
+    from cron.jobs import pause_job
+    job = pause_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/resume")
+async def resume_cron_job(job_id: str):
+    from cron.jobs import resume_job
+    job = resume_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.post("/api/cron/jobs/{job_id}/trigger")
+async def trigger_cron_job(job_id: str):
+    from cron.jobs import trigger_job
+    job = trigger_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return job
+
+
+@app.delete("/api/cron/jobs/{job_id}")
+async def delete_cron_job(job_id: str):
+    from cron.jobs import remove_job
+    if not remove_job(job_id):
+        raise HTTPException(status_code=404, detail="Job not found")
+    return {"ok": True}
+
+
+# ---------------------------------------------------------------------------
+# Skills & Tools endpoints
+# ---------------------------------------------------------------------------
+
+
+class SkillToggle(BaseModel):
+    name: str
+    enabled: bool
+
+
+@app.get("/api/skills")
+async def get_skills():
+    from tools.skills_tool import _find_all_skills
+    from hermes_cli.skills_config import get_disabled_skills
+    config = load_config()
+    disabled = get_disabled_skills(config)
+    skills = _find_all_skills(skip_disabled=True)
+    for s in skills:
+        s["enabled"] = s["name"] not in disabled
+    return skills
+
+
+@app.put("/api/skills/toggle")
+async def toggle_skill(body: SkillToggle):
+    from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills
+    config = load_config()
+    disabled = get_disabled_skills(config)
+    if body.enabled:
+        disabled.discard(body.name)
+    else:
+        disabled.add(body.name)
+    save_disabled_skills(config, disabled)
+    return {"ok": True, "name": body.name, "enabled": body.enabled}
+
+
+@app.get("/api/tools/toolsets")
+async def get_toolsets():
+    from hermes_cli.tools_config import (
+        _get_effective_configurable_toolsets,
+        _get_platform_tools,
+        _toolset_has_keys,
+    )
+    from toolsets import resolve_toolset
+
+    config = load_config()
+    enabled_toolsets = _get_platform_tools(
+        config,
+        "cli",
+        include_default_mcp_servers=False,
+    )
+    result = []
+    for name, label, desc in _get_effective_configurable_toolsets():
+        try:
+            tools = sorted(set(resolve_toolset(name)))
+        except Exception:
+            tools = []
+        is_enabled = name in enabled_toolsets
+        result.append({
+            "name": name, "label": label, "description": desc,
+            "enabled": is_enabled,
+            "available": is_enabled,
+            "configured": _toolset_has_keys(name, config),
+            "tools": tools,
+        })
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Raw YAML config endpoint
+# ---------------------------------------------------------------------------
+
+
+class RawConfigUpdate(BaseModel):
+    yaml_text: str
+
+
+@app.get("/api/config/raw")
+async def get_config_raw():
+    path = get_config_path()
+    if not path.exists():
+        return {"yaml": ""}
+    return {"yaml": path.read_text(encoding="utf-8")}
+
+
+@app.put("/api/config/raw")
+async def update_config_raw(body: RawConfigUpdate):
+    try:
+        parsed = yaml.safe_load(body.yaml_text)
+        if not isinstance(parsed, dict):
+            raise HTTPException(status_code=400, detail="YAML must be a mapping")
+        save_config(parsed)
+        return {"ok": True}
+    except yaml.YAMLError as e:
+        raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
+
+
+# ---------------------------------------------------------------------------
+# Token / cost analytics endpoint
+# ---------------------------------------------------------------------------
+
+
+@app.get("/api/analytics/usage")
+async def get_usage_analytics(days: int = 30):
+    from hermes_state import SessionDB
+    db = SessionDB()
+    try:
+        cutoff = time.time() - (days * 86400)
+        cur = db._conn.execute("""
+            SELECT date(started_at, 'unixepoch') as day,
+                   SUM(input_tokens) as input_tokens,
+                   SUM(output_tokens) as output_tokens,
+                   SUM(cache_read_tokens) as cache_read_tokens,
+                   SUM(reasoning_tokens) as reasoning_tokens,
+                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
+                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
+                   COUNT(*) as sessions
+            FROM sessions WHERE started_at > ?
+            GROUP BY day ORDER BY day
+        """, (cutoff,))
+        daily = [dict(r) for r in cur.fetchall()]
+
+        cur2 = db._conn.execute("""
+            SELECT model,
+                   SUM(input_tokens) as input_tokens,
+                   SUM(output_tokens) as output_tokens,
+                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
+                   COUNT(*) as sessions
+            FROM sessions WHERE started_at > ? AND model IS NOT NULL
+            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
+        """, (cutoff,))
+        by_model = [dict(r) for r in cur2.fetchall()]
+
+        cur3 = db._conn.execute("""
+            SELECT SUM(input_tokens) as total_input,
+                   SUM(output_tokens) as total_output,
+                   SUM(cache_read_tokens) as total_cache_read,
+                   SUM(reasoning_tokens) as total_reasoning,
+                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
+                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
+                   COUNT(*) as total_sessions
+            FROM sessions WHERE started_at > ?
+        """, (cutoff,))
+        totals = dict(cur3.fetchone())
+
+        return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days}
+    finally:
+        db.close()
+
+
+def mount_spa(application: FastAPI):
+    """Mount the built SPA. Falls back to index.html for client-side routing."""
+    if not WEB_DIST.exists():
+        @application.get("/{full_path:path}")
+        async def no_frontend(full_path: str):
+            return JSONResponse(
+                {"error": "Frontend not built. Run: cd web && npm run build"},
+                status_code=404,
+            )
+        return
+
+    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")
+
+    @application.get("/{full_path:path}")
+    async def serve_spa(full_path: str):
+        file_path = WEB_DIST / full_path
+        # Prevent path traversal via url-encoded sequences (%2e%2e/)
+        if (
+            full_path
+            and file_path.resolve().is_relative_to(WEB_DIST.resolve())
+            and file_path.exists()
+            and file_path.is_file()
+        ):
+            return FileResponse(file_path)
+        return FileResponse(
+            WEB_DIST / "index.html",
+            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
+        )
+
+
+mount_spa(app)
+
+
+def start_server(host: str = "127.0.0.1", port: int = 9119, open_browser: bool = True):
+    """Start the web UI server."""
+    import uvicorn
+
+    if host not in ("127.0.0.1", "localhost", "::1"):
+        import logging
+        logging.warning(
+            "Binding to %s — the web UI exposes config and API keys. "
+            "Only bind to non-localhost if you trust all users on the network.", host,
+        )
+
+    if open_browser:
+        import threading
+        import webbrowser
+
+        def _open():
+            import time as _t
+            _t.sleep(1.0)
+            webbrowser.open(f"http://{host}:{port}")
+
+        threading.Thread(target=_open, daemon=True).start()
+
+    print(f"  Hermes Web UI → http://{host}:{port}")
+    uvicorn.run(app, host=host, port=port, log_level="warning")
@@ -189,6 +189,37 @@ def is_wsl() -> bool:
    return _wsl_detected


+_container_detected: bool | None = None
+
+
+def is_container() -> bool:
+    """Return True when running inside a Docker/Podman container.
+
+    Checks ``/.dockerenv`` (Docker), ``/run/.containerenv`` (Podman),
+    and ``/proc/1/cgroup`` for container runtime markers.  Result is
+    cached for the process lifetime.  Import-safe — no heavy deps.
+    """
+    global _container_detected
+    if _container_detected is not None:
+        return _container_detected
+    if os.path.exists("/.dockerenv"):
+        _container_detected = True
+        return True
+    if os.path.exists("/run/.containerenv"):
+        _container_detected = True
+        return True
+    try:
+        with open("/proc/1/cgroup", "r") as f:
+            cgroup = f.read()
+            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
+                _container_detected = True
+                return True
+    except OSError:
+        pass
+    _container_detected = False
+    return False
+
+
 # ─── Well-Known Paths ─────────────────────────────────────────────────────────


@@ -216,6 +247,51 @@ def get_env_path() -> Path:
    return get_hermes_home() / ".env"


+# ─── Network Preferences ─────────────────────────────────────────────────────
+
+
+def apply_ipv4_preference(force: bool = False) -> None:
+    """Monkey-patch ``socket.getaddrinfo`` to prefer IPv4 connections.
+
+    On servers with broken or unreachable IPv6, Python tries AAAA records
+    first and hangs for the full TCP timeout before falling back to IPv4.
+    This affects httpx, requests, urllib, the OpenAI SDK — everything that
+    uses ``socket.getaddrinfo``.
+
+    When *force* is True, patches ``getaddrinfo`` so that calls with
+    ``family=AF_UNSPEC`` (the default) resolve as ``AF_INET`` instead,
+    skipping IPv6 entirely.  If no A record exists, falls back to the
+    original unfiltered resolution so pure-IPv6 hosts still work.
+
+    Safe to call multiple times — only patches once.
+    Set ``network.force_ipv4: true`` in ``config.yaml`` to enable.
+    """
+    if not force:
+        return
+
+    import socket
+
+    # Guard against double-patching
+    if getattr(socket.getaddrinfo, "_hermes_ipv4_patched", False):
+        return
+
+    _original_getaddrinfo = socket.getaddrinfo
+
+    def _ipv4_getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
+        if family == 0:  # AF_UNSPEC — caller didn't request a specific family
+            try:
+                return _original_getaddrinfo(
+                    host, port, socket.AF_INET, type, proto, flags
+                )
+            except socket.gaierror:
+                # No A record — fall back to full resolution (pure-IPv6 hosts)
+                return _original_getaddrinfo(host, port, family, type, proto, flags)
+        return _original_getaddrinfo(host, port, family, type, proto, flags)
+
+    _ipv4_getaddrinfo._hermes_ipv4_patched = True  # type: ignore[attr-defined]
+    socket.getaddrinfo = _ipv4_getaddrinfo  # type: ignore[assignment]
+
+
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"

@@ -7,16 +7,28 @@ gateway call early in their startup path.  All log files live under
 Log files produced:
    agent.log   — INFO+, all agent/tool/session activity (the main log)
    errors.log  — WARNING+, errors and warnings only (quick triage)
+    gateway.log — INFO+, gateway-only events (created when mode="gateway")

-Both files use ``RotatingFileHandler`` with ``RedactingFormatter`` so
+All files use ``RotatingFileHandler`` with ``RedactingFormatter`` so
 secrets are never written to disk.
+
+Component separation:
+    gateway.log only receives records from ``gateway.*`` loggers —
+    platform adapters, session management, slash commands, delivery.
+    agent.log remains the catch-all (everything goes there).
+
+Session context:
+    Call ``set_session_context(session_id)`` at the start of a conversation
+    and ``clear_session_context()`` when done.  All log lines emitted on
+    that thread will include ``[session_id]`` for filtering/correlation.
 """

 import logging
 import os
+import threading
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Sequence

 from hermes_constants import get_config_path, get_hermes_home

@@ -25,9 +37,14 @@ from hermes_constants import get_config_path, get_hermes_home
 # unless ``force=True``.
 _logging_initialized = False

-# Default log format — includes timestamp, level, logger name, and message.
-_LOG_FORMAT = "%(asctime)s %(levelname)s %(name)s: %(message)s"
-_LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+# Thread-local storage for per-conversation session context.
+_session_context = threading.local()
+
+# Default log format — includes timestamp, level, optional session tag,
+# logger name, and message.  The ``%(session_tag)s`` field is guaranteed to
+# exist on every LogRecord via _install_session_record_factory() below.
+_LOG_FORMAT = "%(asctime)s %(levelname)s%(session_tag)s %(name)s: %(message)s"
+_LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s%(session_tag)s - %(message)s"

 # Third-party loggers that are noisy at DEBUG/INFO level.
 _NOISY_LOGGERS = (
@@ -48,6 +65,99 @@ _NOISY_LOGGERS = (
 )


+# ---------------------------------------------------------------------------
+# Public session context API
+# ---------------------------------------------------------------------------
+
+def set_session_context(session_id: str) -> None:
+    """Set the session ID for the current thread.
+
+    All subsequent log records on this thread will include ``[session_id]``
+    in the formatted output.  Call at the start of ``run_conversation()``.
+    """
+    _session_context.session_id = session_id
+
+
+def clear_session_context() -> None:
+    """Clear the session ID for the current thread.
+
+    Optional — ``set_session_context()`` overwrites the previous value,
+    so explicit clearing is only needed if the thread is reused for
+    non-conversation work after ``run_conversation()`` returns.
+    """
+    _session_context.session_id = None
+
+
+# ---------------------------------------------------------------------------
+# Record factory — injects session_tag into every LogRecord at creation
+# ---------------------------------------------------------------------------
+
+def _install_session_record_factory() -> None:
+    """Replace the global LogRecord factory with one that adds ``session_tag``.
+
+    Unlike a ``logging.Filter`` on a handler or logger, the record factory
+    runs for EVERY record in the process — including records that propagate
+    from child loggers and records handled by third-party handlers.  This
+    guarantees ``%(session_tag)s`` is always available in format strings,
+    eliminating the KeyError that would occur if a handler used our format
+    without having a ``_SessionFilter`` attached.
+
+    Idempotent — checks for a marker attribute to avoid double-wrapping if
+    the module is reloaded.
+    """
+    current_factory = logging.getLogRecordFactory()
+    if getattr(current_factory, "_hermes_session_injector", False):
+        return  # already installed
+
+    def _session_record_factory(*args, **kwargs):
+        record = current_factory(*args, **kwargs)
+        sid = getattr(_session_context, "session_id", None)
+        record.session_tag = f" [{sid}]" if sid else ""  # type: ignore[attr-defined]
+        return record
+
+    _session_record_factory._hermes_session_injector = True  # type: ignore[attr-defined]
+    logging.setLogRecordFactory(_session_record_factory)
+
+
+# Install immediately on import — session_tag is available on all records
+# from this point forward, even before setup_logging() is called.
+_install_session_record_factory()
+
+
+# ---------------------------------------------------------------------------
+# Filters
+# ---------------------------------------------------------------------------
+
+class _ComponentFilter(logging.Filter):
+    """Only pass records whose logger name starts with one of *prefixes*.
+
+    Used to route gateway-specific records to ``gateway.log`` while
+    keeping ``agent.log`` as the catch-all.
+    """
+
+    def __init__(self, prefixes: Sequence[str]) -> None:
+        super().__init__()
+        self._prefixes = tuple(prefixes)
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        return record.name.startswith(self._prefixes)
+
+
+# Logger name prefixes that belong to each component.
+# Used by _ComponentFilter and exposed for ``hermes logs --component``.
+COMPONENT_PREFIXES = {
+    "gateway": ("gateway",),
+    "agent": ("agent", "run_agent", "model_tools", "batch_runner"),
+    "tools": ("tools",),
+    "cli": ("hermes_cli", "cli"),
+    "cron": ("cron",),
+}
+
+
+# ---------------------------------------------------------------------------
+# Main setup
+# ---------------------------------------------------------------------------
+
 def setup_logging(
    *,
    hermes_home: Optional[Path] = None,
@@ -78,8 +188,9 @@ def setup_logging(
        Number of rotated backup files to keep.
        Defaults to 3 or the value from config.yaml ``logging.backup_count``.
    mode
-        Hint for the caller context: ``"cli"``, ``"gateway"``, ``"cron"``.
-        Currently used only for log format tuning (gateway includes PID).
+        Caller context: ``"cli"``, ``"gateway"``, ``"cron"``.
+        When ``"gateway"``, an additional ``gateway.log`` file is created
+        that receives only gateway-component records.
    force
        Re-run setup even if it has already been called.

@@ -130,6 +241,18 @@ def setup_logging(
        formatter=RedactingFormatter(_LOG_FORMAT),
    )

+    # --- gateway.log (INFO+, gateway component only) ------------------------
+    if mode == "gateway":
+        _add_rotating_handler(
+            root,
+            log_dir / "gateway.log",
+            level=logging.INFO,
+            max_bytes=5 * 1024 * 1024,
+            backup_count=3,
+            formatter=RedactingFormatter(_LOG_FORMAT),
+            log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
+        )
+
    # Ensure root logger level is low enough for the handlers to fire.
    if root.level == logging.NOTSET or root.level > level:
        root.setLevel(level)
@@ -218,9 +341,16 @@ def _add_rotating_handler(
    max_bytes: int,
    backup_count: int,
    formatter: logging.Formatter,
+    log_filter: Optional[logging.Filter] = None,
 ) -> None:
    """Add a ``RotatingFileHandler`` to *logger*, skipping if one already
    exists for the same resolved file path (idempotent).
+
+    Parameters
+    ----------
+    log_filter
+        Optional filter to attach to the handler (e.g. ``_ComponentFilter``
+        for gateway.log).
    """
    resolved = path.resolve()
    for existing in logger.handlers:
@@ -236,6 +366,8 @@ def _add_rotating_handler(
    )
    handler.setLevel(level)
    handler.setFormatter(formatter)
+    if log_filter is not None:
+        handler.addFilter(log_filter)
    logger.addHandler(handler)


@@ -376,6 +376,24 @@ def backup_existing(path: Path, backup_root: Path) -> Optional[Path]:
    return dest


+# ── Brand rewriting ─────────────────────────────────────────
+# Replace OpenClaw brand names with Hermes in migrated text so that
+# memory entries, user profiles, SOUL.md, and workspace instructions
+# read as self-referential to the new agent identity.
+_REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
+    (re.compile(r'\bOpen[\s-]?Claw\b', re.IGNORECASE), 'Hermes'),
+    (re.compile(r'\bClawdBot\b', re.IGNORECASE), 'Hermes'),
+    (re.compile(r'\bMoltBot\b', re.IGNORECASE), 'Hermes'),
+]
+
+
+def rebrand_text(text: str) -> str:
+    """Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes."""
+    for pattern, replacement in _REBRAND_PATTERNS:
+        text = pattern.sub(replacement, text)
+    return text
+
+
 def parse_existing_memory_entries(path: Path) -> List[str]:
    if not path.exists():
        return []
@@ -782,12 +800,13 @@ class Migrator:
        path.write_text("\n".join(entries) + "\n", encoding="utf-8")
        return path

-    def copy_file(self, source: Path, destination: Path, kind: str) -> None:
+    def copy_file(self, source: Path, destination: Path, kind: str,
+                  transform: Optional[Any] = None) -> None:
        if not source or not source.exists():
            return

        if destination.exists():
-            if sha256_file(source) == sha256_file(destination):
+            if not transform and sha256_file(source) == sha256_file(destination):
                self.record(kind, source, destination, "skipped", "Target already matches source")
                return
            if not self.overwrite:
@@ -797,7 +816,13 @@ class Migrator:
        if self.execute:
            backup_path = self.maybe_backup(destination)
            ensure_parent(destination)
-            shutil.copy2(source, destination)
+            if transform:
+                content = read_text(source)
+                content = transform(content)
+                destination.write_text(content, encoding="utf-8")
+                shutil.copystat(source, destination)
+            else:
+                shutil.copy2(source, destination)
            self.record(kind, source, destination, "migrated", backup=str(backup_path) if backup_path else None)
        else:
            self.record(kind, source, destination, "migrated", "Would copy")
@@ -807,7 +832,7 @@ class Migrator:
        if not source:
            self.record("soul", None, self.target_root / "SOUL.md", "skipped", "No OpenClaw SOUL.md found")
            return
-        self.copy_file(source, self.target_root / "SOUL.md", kind="soul")
+        self.copy_file(source, self.target_root / "SOUL.md", kind="soul", transform=rebrand_text)

    def migrate_workspace_agents(self) -> None:
        source = self.source_candidate(
@@ -821,7 +846,7 @@ class Migrator:
            self.record("workspace-agents", source, None, "skipped", "No workspace target was provided")
            return
        destination = self.workspace_target / WORKSPACE_INSTRUCTIONS_FILENAME
-        self.copy_file(source, destination, kind="workspace-agents")
+        self.copy_file(source, destination, kind="workspace-agents", transform=rebrand_text)

    def migrate_memory(self, source: Optional[Path], destination: Path, limit: int, kind: str) -> None:
        if not source or not source.exists():
@@ -832,6 +857,7 @@ class Migrator:
        if not incoming:
            self.record(kind, source, destination, "skipped", "No importable entries found")
            return
+        incoming = [rebrand_text(entry) for entry in incoming]

        existing = parse_existing_memory_entries(destination)
        merged, stats, overflowed = merge_entries(existing, incoming, limit)
@@ -927,7 +953,7 @@ class Migrator:

    def load_openclaw_config(self) -> Dict[str, Any]:
        # Check current name and legacy config filenames
-        for name in ("openclaw.json", "clawdbot.json", "moldbot.json"):
+        for name in ("openclaw.json", "clawdbot.json", "moltbot.json"):
            config_path = self.source_root / name
            if config_path.exists():
                try:
@@ -997,7 +1023,17 @@ class Migrator:
            .get("workspace")
        )
        if isinstance(workspace, str) and workspace.strip():
-            additions["MESSAGING_CWD"] = workspace.strip()
+            ws_path = workspace.strip()
+            # Skip if the workspace points inside the OpenClaw source directory —
+            # that path will be stale after migration and would cause the Hermes
+            # gateway to use the old OpenClaw workspace as its cwd, picking up
+            # OpenClaw's AGENTS.md, MEMORY.md, etc.
+            try:
+                inside_source = Path(ws_path).resolve().is_relative_to(self.source_root.resolve())
+            except (ValueError, OSError):
+                inside_source = False
+            if not inside_source:
+                additions["MESSAGING_CWD"] = ws_path

        allowlist_path = self.source_root / "credentials" / "telegram-default-allowFrom.json"
        if allowlist_path.exists():
@@ -1543,6 +1579,7 @@ class Migrator:
        if not all_incoming:
            self.record("daily-memory", source_dir, destination, "skipped", "No importable entries found in daily memory files")
            return
+        all_incoming = [rebrand_text(entry) for entry in all_incoming]

        existing = parse_existing_memory_entries(destination)
        merged, stats, overflowed = merge_entries(existing, all_incoming, self.memory_limit)
@@ -1958,7 +1995,9 @@ class Migrator:
            if compaction.get("timeout"):
                pass  # No direct mapping
            if compaction.get("model"):
-                compression["summary_model"] = compaction["model"]
+                aux = hermes_cfg.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                aux_comp["model"] = compaction["model"]
            hermes_cfg["compression"] = compression
            changes = True

@@ -19,6 +19,9 @@
    "agent-browser": "^0.13.0",
    "@askjo/camoufox-browser": "^1.0.0"
  },
+  "overrides": {
+    "lodash": "4.18.1"
+  },
  "engines": {
    "node": ">=18.0.0"
  }
@@ -43,7 +43,7 @@ dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "py
 messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
-matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4"]
+matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
 cli = ["simple-term-menu>=1.0,<2"]
 tts-premium = ["elevenlabs>=1.0,<2"]
 voice = [
@@ -76,6 +76,7 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 feishu = ["lark-oapi>=1.5.3,<2"]
+web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
@@ -107,6 +108,7 @@ all = [
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
  "hermes-agent[mistral]",
+  "hermes-agent[web]",
 ]

 [project.scripts]
@@ -117,6 +119,9 @@ hermes-acp = "acp_adapter.entry:main"
 [tool.setuptools]
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]

+[tool.setuptools.package-data]
+hermes_cli = ["web_dist/**/*"]
+
 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
@@ -94,7 +94,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -460,6 +460,40 @@ def _sanitize_messages_non_ascii(messages: list) -> bool:
    return found


+def _sanitize_tools_non_ascii(tools: list) -> bool:
+    """Strip non-ASCII characters from tool payloads in-place."""
+    return _sanitize_structure_non_ascii(tools)
+
+
+def _sanitize_structure_non_ascii(payload: Any) -> bool:
+    """Strip non-ASCII characters from nested dict/list payloads in-place."""
+    found = False
+
+    def _walk(node):
+        nonlocal found
+        if isinstance(node, dict):
+            for key, value in node.items():
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[key] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+        elif isinstance(node, list):
+            for idx, value in enumerate(node):
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[idx] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+
+    _walk(payload)
+    return found
+
+



@@ -737,6 +771,7 @@ class AIAgent:
        self.service_tier = service_tier
        self.request_overrides = dict(request_overrides or {})
        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
+        self._force_ascii_payload = False
        
        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
        # Reduces input costs by ~75% on multi-turn conversations by caching the
@@ -1212,7 +1247,6 @@ class AIAgent:
            _compression_cfg = {}
        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
-        compression_summary_model = _compression_cfg.get("summary_model") or None
        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
        compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))

@@ -1301,12 +1335,13 @@ class AIAgent:
                protect_first_n=3,
                protect_last_n=compression_protect_last,
                summary_target_ratio=compression_target_ratio,
-                summary_model_override=compression_summary_model,
+                summary_model_override=None,
                quiet_mode=self.quiet_mode,
                base_url=self.base_url,
                api_key=getattr(self, "api_key", ""),
                config_context_length=_config_context_length,
                provider=self.provider,
+                api_mode=self.api_mode,
            )
        self.compression_enabled = compression_enabled

@@ -1563,6 +1598,7 @@ class AIAgent:
                base_url=self.base_url,
                api_key=getattr(self, "api_key", ""),
                provider=self.provider,
+                api_mode=self.api_mode,
            )

        # ── Invalidate cached system prompt so it rebuilds next turn ──
@@ -1696,6 +1732,16 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

+    def _current_main_runtime(self) -> Dict[str, str]:
+        """Return the live main runtime for session-scoped auxiliary routing."""
+        return {
+            "model": getattr(self, "model", "") or "",
+            "provider": getattr(self, "provider", "") or "",
+            "base_url": getattr(self, "base_url", "") or "",
+            "api_key": getattr(self, "api_key", "") or "",
+            "api_mode": getattr(self, "api_mode", "") or "",
+        }
+
    def _check_compression_model_feasibility(self) -> None:
        """Warn at session start if the auxiliary compression model's context
        window is smaller than the main model's compression threshold.
@@ -1716,7 +1762,10 @@ class AIAgent:
            from agent.auxiliary_client import get_text_auxiliary_client
            from agent.model_metadata import get_model_context_length

-            client, aux_model = get_text_auxiliary_client("compression")
+            client, aux_model = get_text_auxiliary_client(
+                "compression",
+                main_runtime=self._current_main_runtime(),
+            )
            if client is None or not aux_model:
                msg = (
                    "⚠ No auxiliary LLM provider configured — context "
@@ -1733,10 +1782,25 @@ class AIAgent:

            aux_base_url = str(getattr(client, "base_url", ""))
            aux_api_key = str(getattr(client, "api_key", ""))
+
+            # Read user-configured context_length for the compression model.
+            # Custom endpoints often don't support /models API queries so
+            # get_model_context_length() falls through to the 128K default,
+            # ignoring the explicit config value.  Pass it as the highest-
+            # priority hint so the configured value is always respected.
+            _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {})
+            _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
+            if _aux_context_config is not None:
+                try:
+                    _aux_context_config = int(_aux_context_config)
+                except (TypeError, ValueError):
+                    _aux_context_config = None
+
            aux_context = get_model_context_length(
                aux_model,
                base_url=aux_base_url,
                api_key=aux_api_key,
+                config_context_length=_aux_context_config,
            )

            threshold = self.context_compressor.threshold_tokens
@@ -1857,12 +1921,13 @@ class AIAgent:
        if not content:
            return ""
        # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
-        # <reasoning>, <REASONING_SCRATCHPAD>
+        # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
-        content = re.sub(r'</?(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
        return content

    def _looks_like_codex_intermediate_ack(
@@ -1987,6 +2052,7 @@ class AIAgent:
            inline_patterns = (
                r"<think>(.*?)</think>",
                r"<thinking>(.*?)</thinking>",
+                r"<thought>(.*?)</thought>",
                r"<reasoning>(.*?)</reasoning>",
                r"<REASONING_SCRATCHPAD>(.*?)</REASONING_SCRATCHPAD>",
            )
@@ -3178,6 +3244,12 @@ class AIAgent:
                f"not on any model name returned by the API."
            )

+        # Environment hints (WSL, Termux, etc.) — tell the agent about the
+        # execution environment so it can translate paths and adapt behavior.
+        _env_hints = build_environment_hints()
+        if _env_hints:
+            prompt_parts.append(_env_hints)
+
        platform_key = (self.platform or "").lower().strip()
        if platform_key in PLATFORM_HINTS:
            prompt_parts.append(PLATFORM_HINTS[platform_key])
@@ -4667,6 +4739,11 @@ class AIAgent:
        Each worker thread gets its own OpenAI client instance. Interrupts only
        close that worker-local client, so retries and other requests never
        inherit a closed transport.
+
+        Includes a stale-call detector: if no response arrives within the
+        configured timeout, the connection is killed and an error raised so
+        the main retry loop can try again with backoff / credential rotation /
+        provider fallback.
        """
        result = {"response": None, "error": None}
        request_client_holder = {"client": None}
@@ -4692,10 +4769,86 @@ class AIAgent:
                if request_client is not None:
                    self._close_request_openai_client(request_client, reason="request_complete")

+        # ── Stale-call timeout (mirrors streaming stale detector) ────────
+        # Non-streaming calls return nothing until the full response is
+        # ready.  Without this, a hung provider can block for the full
+        # httpx timeout (default 1800s) with zero feedback.  The stale
+        # detector kills the connection early so the main retry loop can
+        # apply richer recovery (credential rotation, provider fallback).
+        _stale_base = float(os.getenv("HERMES_API_CALL_STALE_TIMEOUT", 300.0))
+        _base_url = getattr(self, "_base_url", None) or ""
+        if _stale_base == 300.0 and _base_url and is_local_endpoint(_base_url):
+            _stale_timeout = float("inf")
+        else:
+            _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+            if _est_tokens > 100_000:
+                _stale_timeout = max(_stale_base, 600.0)
+            elif _est_tokens > 50_000:
+                _stale_timeout = max(_stale_base, 450.0)
+            else:
+                _stale_timeout = _stale_base
+
+        _call_start = time.time()
+        self._touch_activity("waiting for non-streaming API response")
+
        t = threading.Thread(target=_call, daemon=True)
        t.start()
+        _poll_count = 0
        while t.is_alive():
            t.join(timeout=0.3)
+            _poll_count += 1
+
+            # Touch activity every ~30s so the gateway's inactivity
+            # monitor knows we're alive while waiting for the response.
+            if _poll_count % 100 == 0:  # 100 × 0.3s = 30s
+                _elapsed = time.time() - _call_start
+                self._touch_activity(
+                    f"waiting for non-streaming response ({int(_elapsed)}s elapsed)"
+                )
+
+            # Stale-call detector: kill the connection if no response
+            # arrives within the configured timeout.
+            _elapsed = time.time() - _call_start
+            if _elapsed > _stale_timeout:
+                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                logger.warning(
+                    "Non-streaming API call stale for %.0fs (threshold %.0fs). "
+                    "model=%s context=~%s tokens. Killing connection.",
+                    _elapsed, _stale_timeout,
+                    api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
+                )
+                self._emit_status(
+                    f"⚠️ No response from provider for {int(_elapsed)}s "
+                    f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
+                    f"Aborting call."
+                )
+                try:
+                    if self.api_mode == "anthropic_messages":
+                        from agent.anthropic_adapter import build_anthropic_client
+
+                        self._anthropic_client.close()
+                        self._anthropic_client = build_anthropic_client(
+                            self._anthropic_api_key,
+                            getattr(self, "_anthropic_base_url", None),
+                        )
+                    else:
+                        rc = request_client_holder.get("client")
+                        if rc is not None:
+                            self._close_request_openai_client(rc, reason="stale_call_kill")
+                except Exception:
+                    pass
+                self._touch_activity(
+                    f"stale non-streaming call killed after {int(_elapsed)}s"
+                )
+                # Wait briefly for the thread to notice the closed connection.
+                t.join(timeout=2.0)
+                if result["error"] is None and result["response"] is None:
+                    result["error"] = TimeoutError(
+                        f"Non-streaming API call timed out after {int(_elapsed)}s "
+                        f"with no response (threshold: {int(_stale_timeout)}s)"
+                    )
+                break
+
            if self._interrupt_requested:
                # Force-close the in-flight worker-local HTTP connection to stop
                # token generation without poisoning the shared client used to
@@ -5210,6 +5363,10 @@ class AIAgent:
                                    f"({type(e).__name__}). Reconnecting… "
                                    f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
                                )
+                                self._touch_activity(
+                                    f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
+                                    f"after {type(e).__name__}"
+                                )
                                # Close the stale request client before retry
                                stale = request_client_holder.get("client")
                                if stale is not None:
@@ -5233,8 +5390,7 @@ class AIAgent:
                                "try again in a moment."
                            )
                            logger.warning(
-                                "Streaming exhausted %s retries on transient error, "
-                                "falling back to non-streaming: %s",
+                                "Streaming exhausted %s retries on transient error: %s",
                                _max_stream_retries + 1,
                                e,
                            )
@@ -5245,25 +5401,24 @@ class AIAgent:
                                and "not supported" in _err_lower
                            )
                            if _is_stream_unsupported:
+                                self._disable_streaming = True
                                self._safe_print(
                                    "\n⚠  Streaming is not supported for this "
-                                    "model/provider. Falling back to non-streaming.\n"
+                                    "model/provider. Switching to non-streaming.\n"
                                    "   To avoid this delay, set display.streaming: false "
                                    "in config.yaml\n"
                                )
                            logger.info(
-                                "Streaming failed before delivery, falling back to non-streaming: %s",
+                                "Streaming failed before delivery: %s",
                                e,
                            )

-                        try:
-                            # Reset stale timer — the non-streaming fallback
-                            # uses its own client; prevent the stale detector
-                            # from firing on stale timestamps from failed streams.
-                            last_chunk_time["t"] = time.time()
-                            result["response"] = self._interruptible_api_call(api_kwargs)
-                        except Exception as fallback_err:
-                            result["error"] = fallback_err
+                        # Propagate the error to the main retry loop instead of
+                        # falling back to non-streaming inline.  The main loop has
+                        # richer recovery: credential rotation, provider fallback,
+                        # backoff, and — for "stream not supported" — will switch
+                        # to non-streaming on the next attempt via _disable_streaming.
+                        result["error"] = e
                        return
            finally:
                request_client = request_client_holder.get("client")
@@ -5329,6 +5484,9 @@ class AIAgent:
                # Reset the timer so we don't kill repeatedly while
                # the inner thread processes the closure.
                last_chunk_time["t"] = time.time()
+                self._touch_activity(
+                    f"stale stream detected after {int(_stale_elapsed)}s, reconnecting"
+                )

            if self._interrupt_requested:
                try:
@@ -5354,13 +5512,22 @@ class AIAgent:
                # a new API call, creating a duplicate message.  Return a
                # partial "stop" response instead so the outer loop treats this
                # turn as complete (no retry, no fallback).
+                # Recover whatever content was already streamed to the user.
+                # _current_streamed_assistant_text accumulates text fired
+                # through _fire_stream_delta, so it has exactly what the
+                # user saw before the connection died.
+                _partial_text = (
+                    getattr(self, "_current_streamed_assistant_text", "") or ""
+                ).strip() or None
                logger.warning(
                    "Partial stream delivered before error; returning stub "
-                    "response to prevent duplicate messages: %s",
+                    "response with %s chars of recovered content to prevent "
+                    "duplicate messages: %s",
+                    len(_partial_text or ""),
                    result["error"],
                )
                _stub_msg = SimpleNamespace(
-                    role="assistant", content=None, tool_calls=None,
+                    role="assistant", content=_partial_text, tool_calls=None,
                    reasoning_content=None,
                )
                return SimpleNamespace(
@@ -5819,11 +5986,12 @@ class AIAgent:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
        MiniMax keeps dots (e.g. MiniMax-M2.7).
-        OpenCode Go keeps dots (e.g. minimax-m2.7)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go"}:
+        OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free).
+        ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1)."""
+        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}:
            return True
        base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/go" in base
+        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base

    def _is_qwen_portal(self) -> bool:
        """Return True when the base URL targets Qwen Portal."""
@@ -6548,17 +6716,23 @@ class AIAgent:
            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
                messages.pop()

-    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
+    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
        """Compress conversation context and split the session in SQLite.

+        Args:
+            focus_topic: Optional focus string for guided compression — the
+                summariser will prioritise preserving information related to
+                this topic.  Inspired by Claude Code's ``/compact <focus>``.
+
        Returns:
            (compressed_messages, new_system_prompt) tuple
        """
        _pre_msg_count = len(messages)
        logger.info(
-            "context compression started: session=%s messages=%d tokens=~%s model=%s",
+            "context compression started: session=%s messages=%d tokens=~%s model=%s focus=%r",
            self.session_id or "none", _pre_msg_count,
            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
+            focus_topic,
        )
        # Pre-compression memory flush: let the model save memories before they're lost
        self.flush_memories(messages, min_turns=0)
@@ -6570,7 +6744,7 @@ class AIAgent:
            except Exception:
                pass

-        compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
+        compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic)

        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
@@ -7529,6 +7703,11 @@ class AIAgent:
        # Installed once, transparent when streams are healthy, prevents crash on write.
        _install_safe_stdio()

+        # Tag all log records on this thread with the session ID so
+        # ``hermes logs --session <id>`` can filter a single conversation.
+        from hermes_logging import set_session_context
+        set_session_context(self.session_id)
+
        # If the previous turn activated fallback, restore the primary
        # runtime so this turn gets a fresh attempt with the preferred model.
        # No-op when _fallback_activated is False (gateway, first turn, etc.).
@@ -8045,6 +8224,8 @@ class AIAgent:
                try:
                    self._reset_stream_delivery_tracking()
                    api_kwargs = self._build_api_kwargs(api_messages)
+                    if self._force_ascii_payload:
+                        _sanitize_structure_non_ascii(api_kwargs)
                    if self.api_mode == "codex_responses":
                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)

@@ -8092,7 +8273,12 @@ class AIAgent:
                            self.thinking_callback("")

                    _use_streaming = True
-                    if not self._has_stream_consumers():
+                    # Provider signaled "stream not supported" on a previous
+                    # attempt — switch to non-streaming for the rest of this
+                    # session instead of re-failing every retry.
+                    if getattr(self, "_disable_streaming", False):
+                        _use_streaming = False
+                    elif not self._has_stream_consumers():
                        # No display/TTS consumer. Still prefer streaming for
                        # health checking, but skip for Mock clients in tests
                        # (mocks return SimpleNamespace, not stream iterators).
@@ -8192,7 +8378,8 @@ class AIAgent:
                        if self.thinking_callback:
                            self.thinking_callback("")
                        
-                        # This is often rate limiting or provider returning malformed response
+                        # Invalid response — could be rate limiting, provider timeout,
+                        # upstream server error, or malformed response.
                        retry_count += 1
                        
                        # Eager fallback: empty/malformed responses are a common
@@ -8228,11 +8415,44 @@ class AIAgent:
                            if self.verbose_logging:
                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
                        
+                        # Extract error code from response for contextual diagnostics
+                        _resp_error_code = None
+                        if response and hasattr(response, 'error') and response.error:
+                            _code_raw = getattr(response.error, 'code', None)
+                            if _code_raw is None and isinstance(response.error, dict):
+                                _code_raw = response.error.get('code')
+                            if _code_raw is not None:
+                                try:
+                                    _resp_error_code = int(_code_raw)
+                                except (TypeError, ValueError):
+                                    pass
+
+                        # Build a human-readable failure hint from the error code
+                        # and response time, instead of always assuming rate limiting.
+                        if _resp_error_code == 524:
+                            _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)"
+                        elif _resp_error_code == 504:
+                            _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)"
+                        elif _resp_error_code == 429:
+                            _failure_hint = f"rate limited by upstream provider (429)"
+                        elif _resp_error_code in (500, 502):
+                            _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)"
+                        elif _resp_error_code in (503, 529):
+                            _failure_hint = f"upstream provider overloaded ({_resp_error_code})"
+                        elif _resp_error_code is not None:
+                            _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)"
+                        elif api_duration < 10:
+                            _failure_hint = f"fast response ({api_duration:.1f}s) — likely rate limited"
+                        elif api_duration > 60:
+                            _failure_hint = f"slow response ({api_duration:.0f}s) — likely upstream timeout"
+                        else:
+                            _failure_hint = f"response time {api_duration:.1f}s"
+
                        self._vprint(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
                        self._vprint(f"{self.log_prefix}   🏢 Provider: {provider_name}", force=True)
                        cleaned_provider_error = self._clean_error_message(error_msg)
                        self._vprint(f"{self.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                        self._vprint(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)", force=True)
+                        self._vprint(f"{self.log_prefix}   ⏱️  {_failure_hint}", force=True)
                        
                        if retry_count >= max_retries:
                            # Try fallback before giving up
@@ -8249,31 +8469,39 @@ class AIAgent:
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
-                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
+                                "error": f"Invalid API response after {max_retries} retries: {_failure_hint}",
                                "failed": True  # Mark as failure for filtering
                            }
                        
-                        # Longer backoff for rate limiting (likely cause of None choices)
-                        # Jittered exponential: 5s base, 120s cap + random jitter
+                        # Backoff before retry — jittered exponential: 5s base, 120s cap
                        wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...", force=True)
+                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                        
                        # Sleep in small increments to stay responsive to interrupts
                        sleep_end = time.time() + wait_time
+                        _backoff_touch_counter = 0
                        while time.time() < sleep_end:
                            if self._interrupt_requested:
                                self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
                                self._persist_session(messages, conversation_history)
                                self.clear_interrupt()
                                return {
-                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
+                                    "final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).",
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
                                    "interrupted": True,
                                }
                            time.sleep(0.2)
+                            # Touch activity every ~30s so the gateway's inactivity
+                            # monitor knows we're alive during backoff waits.
+                            _backoff_touch_counter += 1
+                            if _backoff_touch_counter % 150 == 0:  # 150 × 0.2s = 30s
+                                self._touch_activity(
+                                    f"retry backoff ({retry_count}/{max_retries}), "
+                                    f"{int(sleep_end - time.time())}s remaining"
+                                )
                        continue  # Retry the API call

                    # Check finish_reason before proceeding
@@ -8628,18 +8856,56 @@ class AIAgent:
                            )
                            continue
                        if _is_ascii_codec:
+                            self._force_ascii_payload = True
                            # ASCII codec: the system encoding can't handle
                            # non-ASCII characters at all. Sanitize all
-                            # non-ASCII content from messages and retry.
-                            if _sanitize_messages_non_ascii(messages):
+                            # non-ASCII content from messages/tool schemas and retry.
+                            _messages_sanitized = _sanitize_messages_non_ascii(messages)
+                            _prefill_sanitized = False
+                            if isinstance(getattr(self, "prefill_messages", None), list):
+                                _prefill_sanitized = _sanitize_messages_non_ascii(self.prefill_messages)
+
+                            _tools_sanitized = False
+                            if isinstance(getattr(self, "tools", None), list):
+                                _tools_sanitized = _sanitize_tools_non_ascii(self.tools)
+
+                            _system_sanitized = False
+                            if isinstance(active_system_prompt, str):
+                                _sanitized_system = _strip_non_ascii(active_system_prompt)
+                                if _sanitized_system != active_system_prompt:
+                                    active_system_prompt = _sanitized_system
+                                    self._cached_system_prompt = _sanitized_system
+                                    _system_sanitized = True
+                            if isinstance(getattr(self, "ephemeral_system_prompt", None), str):
+                                _sanitized_ephemeral = _strip_non_ascii(self.ephemeral_system_prompt)
+                                if _sanitized_ephemeral != self.ephemeral_system_prompt:
+                                    self.ephemeral_system_prompt = _sanitized_ephemeral
+                                    _system_sanitized = True
+
+                            _headers_sanitized = False
+                            _default_headers = (
+                                self._client_kwargs.get("default_headers")
+                                if isinstance(getattr(self, "_client_kwargs", None), dict)
+                                else None
+                            )
+                            if isinstance(_default_headers, dict):
+                                _headers_sanitized = _sanitize_structure_non_ascii(_default_headers)
+
+                            if (
+                                _messages_sanitized
+                                or _prefill_sanitized
+                                or _tools_sanitized
+                                or _system_sanitized
+                                or _headers_sanitized
+                            ):
                                self._unicode_sanitization_passes += 1
                                self._vprint(
-                                    f"{self.log_prefix}⚠️  System encoding is ASCII — stripped non-ASCII characters from messages. Retrying...",
+                                    f"{self.log_prefix}⚠️  System encoding is ASCII — stripped non-ASCII characters from request payload. Retrying...",
                                    force=True,
                                )
                                continue
-                        # Nothing to sanitize in messages — might be in system
-                        # prompt or prefill. Fall through to normal error path.
+                        # Nothing to sanitize in any payload component.
+                        # Fall through to normal error path.

                    status_code = getattr(api_error, "status_code", None)
                    error_context = self._extract_api_error_context(api_error)
@@ -8746,6 +9012,9 @@ class AIAgent:

                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
+                    self._touch_activity(
+                        f"API error recovery (attempt {retry_count}/{max_retries})"
+                    )
                    
                    error_type = type(api_error).__name__
                    error_msg = str(api_error).lower()
@@ -9272,6 +9541,7 @@ class AIAgent:
                    # Sleep in small increments so we can respond to interrupts quickly
                    # instead of blocking the entire wait_time in one sleep() call
                    sleep_end = time.time() + wait_time
+                    _backoff_touch_counter = 0
                    while time.time() < sleep_end:
                        if self._interrupt_requested:
                            self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
@@ -9285,6 +9555,14 @@ class AIAgent:
                                "interrupted": True,
                            }
                        time.sleep(0.2)  # Check interrupt every 200ms
+                        # Touch activity every ~30s so the gateway's inactivity
+                        # monitor knows we're alive during backoff waits.
+                        _backoff_touch_counter += 1
+                        if _backoff_touch_counter % 150 == 0:  # 150 × 0.2s = 30s
+                            self._touch_activity(
+                                f"error retry backoff ({retry_count}/{max_retries}), "
+                                f"{int(sleep_end - time.time())}s remaining"
+                            )
            
            # If the API call was interrupted, skip response processing
            if interrupted:
@@ -9670,12 +9948,25 @@ class AIAgent:
                    
                    # Pop thinking-only prefill message(s) before appending
                    # (tool-call path — same rationale as the final-response path).
+                    _had_prefill = False
                    while (
                        messages
                        and isinstance(messages[-1], dict)
                        and messages[-1].get("_thinking_prefill")
                    ):
                        messages.pop()
+                        _had_prefill = True
+
+                    # Reset prefill counter when tool calls follow a prefill
+                    # recovery.  Without this, the counter accumulates across
+                    # the whole conversation — a model that intermittently
+                    # empties (empty → prefill → tools → empty → prefill →
+                    # tools) burns both prefill attempts and the third empty
+                    # gets zero recovery.  Resetting here treats each tool-
+                    # call success as a fresh start.
+                    if _had_prefill:
+                        self._thinking_prefill_retries = 0
+                        self._empty_content_retries = 0

                    messages.append(assistant_msg)
                    self._emit_interim_assistant_message(assistant_msg)
@@ -9794,6 +10085,30 @@ class AIAgent:
                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
+                        # ── Partial stream recovery ─────────────────────
+                        # If content was already streamed to the user before
+                        # the connection died, use it as the final response
+                        # instead of falling through to prior-turn fallback
+                        # or wasting API calls on retries.
+                        _partial_streamed = (
+                            getattr(self, "_current_streamed_assistant_text", "") or ""
+                        )
+                        if self._has_content_after_think_block(_partial_streamed):
+                            _turn_exit_reason = "partial_stream_recovery"
+                            _recovered = self._strip_think_blocks(_partial_streamed).strip()
+                            logger.info(
+                                "Partial stream content delivered (%d chars) "
+                                "— using as final response",
+                                len(_recovered),
+                            )
+                            self._emit_status(
+                                "↻ Stream interrupted — using delivered content "
+                                "as final response"
+                            )
+                            final_response = _recovered
+                            self._response_was_previewed = True
+                            break
+
                        # If the previous turn already delivered real content alongside
                        # tool calls (e.g. "You're welcome!" + memory save), the model
                        # has nothing more to say. Use the earlier content immediately
@@ -9851,16 +10166,23 @@ class AIAgent:
                            self._save_session_log(messages)
                            continue

-                        # ── Empty response retry (no reasoning) ──────
-                        # Model returned nothing — no content, no
-                        # structured reasoning, no tool calls.  Common
-                        # with open models (transient provider issues,
-                        # rate limits, sampling flukes).  Retry up to 3
-                        # times before attempting fallback.  Skip when
-                        # content has inline <think> tags (model chose
-                        # to reason, just no visible text).
-                        _truly_empty = not final_response.strip()
-                        if _truly_empty and not _has_structured and self._empty_content_retries < 3:
+                        # ── Empty response retry ──────────────────────
+                        # Model returned nothing usable.  Retry up to 3
+                        # times before attempting fallback.  This covers
+                        # both truly empty responses (no content, no
+                        # reasoning) AND reasoning-only responses after
+                        # prefill exhaustion — models like mimo-v2-pro
+                        # always populate reasoning fields via OpenRouter,
+                        # so the old `not _has_structured` guard blocked
+                        # retries for every reasoning model after prefill.
+                        _truly_empty = not self._strip_think_blocks(
+                            final_response
+                        ).strip()
+                        _prefill_exhausted = (
+                            _has_structured
+                            and self._thinking_prefill_retries >= 2
+                        )
+                        if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3:
                            self._empty_content_retries += 1
                            logger.warning(
                                "Empty response (no content or reasoning) — "
@@ -10054,17 +10376,11 @@ class AIAgent:
        if final_response is None and (
            api_call_count >= self.max_iterations
            or self.iteration_budget.remaining <= 0
-        ) and not self._budget_exhausted_injected:
-            # Budget exhausted but we haven't tried asking the model to
-            # summarise yet.  Inject a user message and give it one grace
-            # API call to produce a text response.
-            self._budget_exhausted_injected = True
-            self._budget_grace_call = True
-            _grace_msg = (
-                "Your tool budget ran out. Please give me the information "
-                "or actions you've completed so far."
-            )
-            messages.append({"role": "user", "content": _grace_msg})
+        ):
+            # Budget exhausted — ask the model for a summary via one extra
+            # API call with tools stripped.  _handle_max_iterations injects a
+            # user message and makes a single toolless request.
+            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
            self._emit_status(
                f"⚠️ Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                "— asking model to summarise"
@@ -10074,14 +10390,6 @@ class AIAgent:
                    f"\n⚠️  Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                    "— requesting summary..."
                )
-
-        if final_response is None and (
-            api_call_count >= self.max_iterations
-            or self.iteration_budget.remaining <= 0
-        ) and not self._budget_grace_call:
-            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
-            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
-                print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
@@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+"""Build the Hermes Skills Index — a centralized JSON catalog of all skills.
+
+This script crawls every skill source (skills.sh, GitHub taps, official,
+clawhub, lobehub, claude-marketplace) and writes a JSON index with resolved
+GitHub paths. The index is served as a static file on the docs site so that
+`hermes skills search/install` can use it without hitting the GitHub API.
+
+Usage:
+    # Local (uses gh CLI or GITHUB_TOKEN for auth)
+    python scripts/build_skills_index.py
+
+    # CI (set GITHUB_TOKEN as secret)
+    GITHUB_TOKEN=ghp_... python scripts/build_skills_index.py
+
+Output: website/static/api/skills-index.json
+"""
+
+import json
+import os
+import sys
+import time
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+
+# Allow importing from repo root
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, REPO_ROOT)
+
+# Ensure HERMES_HOME is set (needed by tools/skills_hub.py imports)
+os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
+
+from tools.skills_hub import (
+    GitHubAuth,
+    GitHubSource,
+    SkillsShSource,
+    OptionalSkillSource,
+    WellKnownSkillSource,
+    ClawHubSource,
+    ClaudeMarketplaceSource,
+    LobeHubSource,
+    SkillMeta,
+)
+import httpx
+
+OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
+INDEX_VERSION = 1
+
+
+def _meta_to_dict(meta: SkillMeta) -> dict:
+    """Convert a SkillMeta to a serializable dict."""
+    return {
+        "name": meta.name,
+        "description": meta.description,
+        "source": meta.source,
+        "identifier": meta.identifier,
+        "trust_level": meta.trust_level,
+        "repo": meta.repo or "",
+        "path": meta.path or "",
+        "tags": meta.tags or [],
+        "extra": meta.extra or {},
+    }
+
+
+def crawl_source(source, source_name: str, limit: int) -> list:
+    """Crawl a single source and return skill dicts."""
+    print(f"  Crawling {source_name}...", flush=True)
+    start = time.time()
+    try:
+        results = source.search("", limit=limit)
+    except Exception as e:
+        print(f"  Error crawling {source_name}: {e}", file=sys.stderr)
+        return []
+    skills = [_meta_to_dict(m) for m in results]
+    elapsed = time.time() - start
+    print(f"  {source_name}: {len(skills)} skills ({elapsed:.1f}s)", flush=True)
+    return skills
+
+
+def crawl_skills_sh(source: SkillsShSource) -> list:
+    """Crawl skills.sh using popular queries for broad coverage."""
+    print("  Crawling skills.sh (popular queries)...", flush=True)
+    start = time.time()
+
+    queries = [
+        "",  # featured
+        "react", "python", "web", "api", "database", "docker",
+        "testing", "scraping", "design", "typescript", "git",
+        "aws", "security", "data", "ml", "ai", "devops",
+        "frontend", "backend", "mobile", "cli", "documentation",
+        "kubernetes", "terraform", "rust", "go", "java",
+    ]
+
+    all_skills: dict[str, dict] = {}
+    for query in queries:
+        try:
+            results = source.search(query, limit=50)
+            for meta in results:
+                entry = _meta_to_dict(meta)
+                if entry["identifier"] not in all_skills:
+                    all_skills[entry["identifier"]] = entry
+        except Exception as e:
+            print(f"    Warning: skills.sh search '{query}' failed: {e}",
+                  file=sys.stderr)
+
+    elapsed = time.time() - start
+    print(f"  skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
+          flush=True)
+    return list(all_skills.values())
+
+
+def _fetch_repo_tree(repo: str, auth: GitHubAuth) -> list:
+    """Fetch the recursive tree for a repo. Returns list of tree entries."""
+    headers = auth.get_headers()
+    try:
+        resp = httpx.get(
+            f"https://api.github.com/repos/{repo}",
+            headers=headers, timeout=15, follow_redirects=True,
+        )
+        if resp.status_code != 200:
+            return []
+        branch = resp.json().get("default_branch", "main")
+
+        resp = httpx.get(
+            f"https://api.github.com/repos/{repo}/git/trees/{branch}",
+            params={"recursive": "1"},
+            headers=headers, timeout=30, follow_redirects=True,
+        )
+        if resp.status_code != 200:
+            return []
+        data = resp.json()
+        if data.get("truncated"):
+            return []
+        return data.get("tree", [])
+    except Exception:
+        return []
+
+
+def batch_resolve_paths(skills: list, auth: GitHubAuth) -> list:
+    """Resolve GitHub paths for skills.sh entries using batch tree lookups.
+
+    Instead of resolving each skill individually (N×M API calls), we:
+    1. Group skills by repo
+    2. Fetch one tree per repo (2 API calls per repo)
+    3. Find all SKILL.md files in the tree
+    4. Match skills to their resolved paths
+    """
+    # Filter to skills.sh entries that need resolution
+    skills_sh = [s for s in skills if s["source"] in ("skills.sh", "skills-sh")]
+    if not skills_sh:
+        return skills
+
+    print(f"  Resolving paths for {len(skills_sh)} skills.sh entries...",
+          flush=True)
+    start = time.time()
+
+    # Group by repo
+    by_repo: dict[str, list] = defaultdict(list)
+    for s in skills_sh:
+        repo = s.get("repo", "")
+        if repo:
+            by_repo[repo].append(s)
+
+    print(f"    {len(by_repo)} unique repos to scan", flush=True)
+
+    resolved_count = 0
+
+    # Fetch trees in parallel (up to 6 concurrent)
+    def _resolve_repo(repo: str, entries: list):
+        tree = _fetch_repo_tree(repo, auth)
+        if not tree:
+            return 0
+
+        # Find all SKILL.md paths in this repo
+        skill_paths = {}  # skill_dir_name -> full_path
+        for item in tree:
+            if item.get("type") != "blob":
+                continue
+            path = item.get("path", "")
+            if path.endswith("/SKILL.md"):
+                skill_dir = path[: -len("/SKILL.md")]
+                dir_name = skill_dir.split("/")[-1]
+                skill_paths[dir_name.lower()] = f"{repo}/{skill_dir}"
+
+                # Also check SKILL.md frontmatter name if we can match by path
+                # For now, just index by directory name
+            elif path == "SKILL.md":
+                # Root-level SKILL.md
+                skill_paths["_root_"] = f"{repo}"
+
+        count = 0
+        for entry in entries:
+            # Try to match the skill's name/path to a tree entry
+            skill_name = entry.get("name", "").lower()
+            skill_path = entry.get("path", "").lower()
+            identifier = entry.get("identifier", "")
+
+            # Extract the skill token from the identifier
+            # e.g. "skills-sh/d4vinci/scrapling/scrapling-official" -> "scrapling-official"
+            parts = identifier.replace("skills-sh/", "").replace("skills.sh/", "")
+            skill_token = parts.split("/")[-1].lower() if "/" in parts else ""
+
+            # Try matching in order of likelihood
+            for candidate in [skill_token, skill_name, skill_path]:
+                if not candidate:
+                    continue
+                matched = skill_paths.get(candidate)
+                if matched:
+                    entry["resolved_github_id"] = matched
+                    count += 1
+                    break
+            else:
+                # Try fuzzy: skill_token with common transformations
+                for tree_name, tree_path in skill_paths.items():
+                    if (skill_token and (
+                        tree_name.replace("-", "") == skill_token.replace("-", "")
+                        or skill_token in tree_name
+                        or tree_name in skill_token
+                    )):
+                        entry["resolved_github_id"] = tree_path
+                        count += 1
+                        break
+
+        return count
+
+    with ThreadPoolExecutor(max_workers=6) as pool:
+        futures = {
+            pool.submit(_resolve_repo, repo, entries): repo
+            for repo, entries in by_repo.items()
+        }
+        for future in as_completed(futures):
+            try:
+                resolved_count += future.result()
+            except Exception as e:
+                repo = futures[future]
+                print(f"    Warning: {repo}: {e}", file=sys.stderr)
+
+    elapsed = time.time() - start
+    print(f"  Resolved {resolved_count}/{len(skills_sh)} paths ({elapsed:.1f}s)",
+          flush=True)
+    return skills
+
+
+def main():
+    print("Building Hermes Skills Index...", flush=True)
+    overall_start = time.time()
+
+    auth = GitHubAuth()
+    print(f"GitHub auth: {auth.auth_method()}")
+    if auth.auth_method() == "anonymous":
+        print("WARNING: No GitHub authentication — rate limit is 60/hr. "
+              "Set GITHUB_TOKEN for better results.", file=sys.stderr)
+
+    skills_sh_source = SkillsShSource(auth=auth)
+    sources = {
+        "official": OptionalSkillSource(),
+        "well-known": WellKnownSkillSource(),
+        "github": GitHubSource(auth=auth),
+        "clawhub": ClawHubSource(),
+        "claude-marketplace": ClaudeMarketplaceSource(auth=auth),
+        "lobehub": LobeHubSource(),
+    }
+
+    all_skills: list[dict] = []
+
+    # Crawl skills.sh
+    all_skills.extend(crawl_skills_sh(skills_sh_source))
+
+    # Crawl other sources in parallel
+    with ThreadPoolExecutor(max_workers=4) as pool:
+        futures = {}
+        for name, source in sources.items():
+            futures[pool.submit(crawl_source, source, name, 500)] = name
+        for future in as_completed(futures):
+            try:
+                all_skills.extend(future.result())
+            except Exception as e:
+                print(f"  Error: {e}", file=sys.stderr)
+
+    # Batch resolve GitHub paths for skills.sh entries
+    all_skills = batch_resolve_paths(all_skills, auth)
+
+    # Deduplicate by identifier
+    seen: dict[str, dict] = {}
+    for skill in all_skills:
+        key = skill["identifier"]
+        if key not in seen:
+            seen[key] = skill
+    deduped = list(seen.values())
+
+    # Sort
+    source_order = {"official": 0, "skills-sh": 1, "skills.sh": 1,
+                    "github": 2, "well-known": 3, "clawhub": 4,
+                    "claude-marketplace": 5, "lobehub": 6}
+    deduped.sort(key=lambda s: (source_order.get(s["source"], 99), s["name"]))
+
+    # Build index
+    index = {
+        "version": INDEX_VERSION,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "skill_count": len(deduped),
+        "skills": deduped,
+    }
+
+    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
+
+    elapsed = time.time() - overall_start
+    file_size = os.path.getsize(OUTPUT_PATH)
+    print(f"\nDone! {len(deduped)} skills indexed in {elapsed:.0f}s")
+    print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
+
+    from collections import Counter
+    by_source = Counter(s["source"] for s in deduped)
+    for src, count in sorted(by_source.items(), key=lambda x: -x[1]):
+        resolved = sum(1 for s in deduped
+                       if s["source"] == src and s.get("resolved_github_id"))
+        extra = f" ({resolved} resolved)" if resolved else ""
+        print(f"  {src}: {count}{extra}")
+
+
+if __name__ == "__main__":
+    main()
@@ -15,9 +15,9 @@
      }
    },
    "node_modules/@borewit/text-codec": {
-      "version": "0.2.1",
-      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.1.tgz",
-      "integrity": "sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==",
+      "version": "0.2.2",
+      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz",
+      "integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==",
      "license": "MIT",
      "funding": {
        "type": "github",
@@ -1088,9 +1088,9 @@
      }
    },
    "node_modules/file-type": {
-      "version": "21.3.0",
-      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.0.tgz",
-      "integrity": "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA==",
+      "version": "21.3.4",
+      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz",
+      "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/inflate": "^0.4.1",
@@ -1456,9 +1456,9 @@
      "license": "MIT"
    },
    "node_modules/music-metadata": {
-      "version": "11.12.1",
-      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.1.tgz",
-      "integrity": "sha512-j++ltLxHDb5VCXET9FzQ8bnueiLHwQKgCO7vcbkRH/3F7fRjPkv6qncGEJ47yFhmemcYtgvsOAlcQ1dRBTkDjg==",
+      "version": "11.12.3",
+      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.3.tgz",
+      "integrity": "sha512-n6hSTZkuD59qWgHh6IP5dtDlDZQXoxk/bcA85Jywg8Z1iFrlNgl2+GTFgjZyn52W5UgQpV42V4XqrQZZAMbZTQ==",
      "funding": [
        {
          "type": "github",
@@ -1471,11 +1471,11 @@
      ],
      "license": "MIT",
      "dependencies": {
-        "@borewit/text-codec": "^0.2.1",
+        "@borewit/text-codec": "^0.2.2",
        "@tokenizer/token": "^0.3.0",
        "content-type": "^1.0.5",
        "debug": "^4.4.3",
-        "file-type": "^21.3.0",
+        "file-type": "^21.3.1",
        "media-typer": "^1.1.0",
        "strtok3": "^10.3.4",
        "token-types": "^6.1.2",
@@ -1589,9 +1589,9 @@
      }
    },
    "node_modules/path-to-regexp": {
-      "version": "0.1.12",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
-      "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.13.tgz",
+      "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==",
      "license": "MIT"
    },
    "node_modules/pino": {
@@ -2002,9 +2002,9 @@
      }
    },
    "node_modules/strtok3": {
-      "version": "10.3.4",
-      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.4.tgz",
-      "integrity": "sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==",
+      "version": "10.3.5",
+      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz",
+      "integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/token": "^0.3.0"
@@ -19,7 +19,7 @@ What makes Hermes different:

 - **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment.
 - **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works.
- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat.
+- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 10+ other platforms with full tool access, not just chat.
 - **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically.
 - **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory.
 - **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem.
@@ -148,7 +148,7 @@ hermes gateway status       Check status
 hermes gateway setup        Configure platforms
 ```

-Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI.
+Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, BlueBubbles (iMessage), Weixin (WeChat), API Server, Webhooks. Open WebUI connects via the API Server adapter.

 Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/

@@ -215,7 +215,7 @@ hermes insights [--days N]  Usage analytics
 hermes update               Update to latest version
 hermes pairing list/approve/revoke  DM authorization
 hermes plugins list/install/remove  Plugin management
-hermes honcho setup/status  Honcho memory integration
+hermes honcho setup/status  Honcho memory integration (requires honcho plugin)
 hermes memory setup/status/off  Memory provider config
 hermes completion bash|zsh  Shell completions
 hermes acp                  ACP server (IDE integration)
@@ -269,6 +269,28 @@ Type these during an interactive chat session.
 /plugins             List plugins (CLI)
 ```

+### Gateway
+```
+/approve             Approve a pending command (gateway)
+/deny                Deny a pending command (gateway)
+/restart             Restart gateway (gateway)
+/sethome             Set current chat as home channel (gateway)
+/update              Update Hermes to latest (gateway)
+/platforms (/gateway) Show platform connection status (gateway)
+```
+
+### Utility
+```
+/branch (/fork)      Branch the current session
+/btw                 Ephemeral side question (doesn't interrupt main task)
+/fast                Toggle priority/fast processing
+/browser             Open CDP browser connection
+/history             Show conversation history (CLI)
+/save                Save conversation to file (CLI)
+/paste               Attach clipboard image (CLI)
+/image               Attach local image file (CLI)
+```
+
 ### Info
 ```
 /help                Show commands
@@ -311,11 +333,11 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
 | `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) |
 | `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) |
 | `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` |
-| `stt` | `enabled`, `provider` (local/groq/openai) |
-| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) |
+| `stt` | `enabled`, `provider` (local/groq/openai/mistral) |
+| `tts` | `provider` (edge/elevenlabs/openai/minimax/mistral/neutts) |
 | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
 | `security` | `tirith_enabled`, `website_blocklist` |
-| `delegation` | `model`, `provider`, `max_iterations` (50) |
+| `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
 | `smart_model_routing` | `enabled`, `cheap_model` |
 | `checkpoints` | `enabled`, `max_snapshots` (50) |

@@ -323,7 +345,7 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con

 ### Providers

-18 providers supported. Set via `hermes model` or `hermes setup`.
+20+ providers supported. Set via `hermes model` or `hermes setup`.

 | Provider | Auth | Key env var |
 |----------|------|-------------|
@@ -332,16 +354,23 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
 | Nous Portal | OAuth | `hermes login --provider nous` |
 | OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
 | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
+| Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
 | DeepSeek | API key | `DEEPSEEK_API_KEY` |
+| xAI / Grok | API key | `XAI_API_KEY` |
 | Hugging Face | Token | `HF_TOKEN` |
 | Z.AI / GLM | API key | `GLM_API_KEY` |
 | MiniMax | API key | `MINIMAX_API_KEY` |
+| MiniMax CN | API key | `MINIMAX_CN_API_KEY` |
 | Kimi / Moonshot | API key | `KIMI_API_KEY` |
 | Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` |
+| Xiaomi MiMo | API key | `XIAOMI_API_KEY` |
 | Kilo Code | API key | `KILOCODE_API_KEY` |
+| AI Gateway (Vercel) | API key | `AI_GATEWAY_API_KEY` |
+| OpenCode Zen | API key | `OPENCODE_ZEN_API_KEY` |
+| OpenCode Go | API key | `OPENCODE_GO_API_KEY` |
+| Qwen OAuth | OAuth | `hermes login --provider qwen-oauth` |
 | Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml |
-
-Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP.
+| GitHub Copilot ACP | External | `COPILOT_CLI_PATH` or Copilot CLI |

 Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers

@@ -365,6 +394,10 @@ Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable
 | `delegation` | Subagent task delegation |
 | `cronjob` | Scheduled task management |
 | `clarify` | Ask user clarifying questions |
+| `messaging` | Cross-platform message sending |
+| `search` | Web search only (subset of `web`) |
+| `todo` | In-session task planning and tracking |
+| `rl` | Reinforcement learning tools (off by default) |
 | `moa` | Mixture of Agents (off by default) |
 | `homeassistant` | Smart home control (off by default) |

@@ -382,12 +415,13 @@ Provider priority (auto-detected):
 1. **Local faster-whisper** — free, no API key: `pip install faster-whisper`
 2. **Groq Whisper** — free tier: set `GROQ_API_KEY`
 3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY`
+4. **Mistral Voxtral** — set `MISTRAL_API_KEY`

 Config:
 ```yaml
 stt:
  enabled: true
-  provider: local        # local, groq, openai
+  provider: local        # local, groq, openai, mistral
  local:
    model: base          # tiny, base, small, medium, large-v3
 ```
@@ -399,8 +433,9 @@ stt:
 | Edge TTS | None | Yes (default) |
 | ElevenLabs | `ELEVENLABS_API_KEY` | Free tier |
 | OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid |
-| Kokoro (local) | None | Free |
-| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier |
+| MiniMax | `MINIMAX_API_KEY` | Paid |
+| Mistral (Voxtral) | `MISTRAL_API_KEY` | Paid |
+| NeuTTS (local) | None (`pip install neutts[all]` + `espeak-ng`) | Free |

 Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`.

@@ -492,7 +527,7 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 ### Voice not working
 1. Check `stt.enabled: true` in config.yaml
 2. Verify provider: `pip install faster-whisper` or set API key
-3. Restart gateway: `/restart`
+3. In gateway: `/restart`. In CLI: exit and relaunch.

 ### Tool not available
 1. `hermes tools` — check if toolset is enabled for your platform
@@ -503,10 +538,11 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 1. `hermes doctor` — check config and dependencies
 2. `hermes login` — re-authenticate OAuth providers
 3. Check `.env` has the right API key
+4. **Copilot 403**: `gh auth login` tokens do NOT work for Copilot API. You must use the Copilot-specific OAuth device code flow via `hermes model` → GitHub Copilot.

 ### Changes not taking effect
 - **Tools/skills:** `/reset` starts a new session with updated toolset
- **Config changes:** `/restart` reloads gateway config
+- **Config changes:** In gateway: `/restart`. In CLI: exit and relaunch.
 - **Code changes:** Restart the CLI or gateway process

 ### Skills not showing
@@ -520,6 +556,23 @@ Check logs first:
 grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20
 ```

+Common gateway problems:
+- **Gateway dies on SSH logout**: Enable linger: `sudo loginctl enable-linger $USER`
+- **Gateway dies on WSL2 close**: WSL2 requires `systemd=true` in `/etc/wsl.conf` for systemd services to work. Without it, gateway falls back to `nohup` (dies when session closes).
+- **Gateway crash loop**: Reset the failed state: `systemctl --user reset-failed hermes-gateway`
+
+### Platform-specific issues
+- **Discord bot silent**: Must enable **Message Content Intent** in Bot → Privileged Gateway Intents.
+- **Slack bot only works in DMs**: Must subscribe to `message.channels` event. Without it, the bot ignores public channels.
+- **Windows HTTP 400 "No models provided"**: Config file encoding issue (BOM). Ensure `config.yaml` is saved as UTF-8 without BOM.
+
+### Auxiliary models not working
+If `auxiliary` tasks (vision, compression, session_search) fail silently, the `auto` provider can't find a backend. Either set `OPENROUTER_API_KEY` or `GOOGLE_API_KEY`, or explicitly configure each auxiliary task's provider:
+```bash
+hermes config set auxiliary.vision.provider <your_provider>
+hermes config set auxiliary.vision.model <model_name>
+```
+
 ---

 ## Where to Find Things
@@ -557,7 +610,7 @@ hermes-agent/
 ├── toolsets.py           # Toolset definitions
 ├── cli.py                # Interactive CLI (HermesCLI)
 ├── hermes_state.py       # SQLite session store
-├── agent/                # Prompt builder, compression, display, adapters
+├── agent/                # Prompt builder, context compression, memory, model routing, credential pooling, skill dispatch
 ├── hermes_cli/           # CLI subcommands, config, setup, commands
 │   ├── commands.py       # Slash command registry (CommandDef)
 │   ├── config.py         # DEFAULT_CONFIG, env var definitions
@@ -626,7 +679,6 @@ run_conversation():
 ### Testing

 ```bash
-source venv/bin/activate  # or .venv/bin/activate
 python -m pytest tests/ -o 'addopts=' -q   # Full suite
 python -m pytest tests/tools/ -q            # Specific area
 ```
@@ -0,0 +1,258 @@
+---
+name: deep-research
+description: Iterative deep research loop — discover, plan, execute, review, repeat until convergence, then write up. Produces plan.json, research_notes.md, and final_report.md.
+version: 2.0.0
+tags: [research, arxiv, literature-review, technical-analysis, survey, iterative]
+related_skills: [arxiv, deep-research-training-data]
+---
+
+# Deep Research
+
+## When to Use
+
+User asks for a deep dive, literature review, landscape mapping, or systematic comparison on a technical topic.
+
+## Setup
+
+Create working directory and initial files:
+
+```
+~/research/<topic-slug>/
+  plan.json
+  research_notes.md
+  final_report.md        # created in Phase 5
+```
+
+Initialize plan.json:
+```json
+{"topic": "", "revision": 0, "questions": []}
+```
+
+Initialize research_notes.md:
+```markdown
+# Research Notes: <TOPIC>
+<!-- Append-only. Never delete earlier findings. -->
+```
+
+---
+
+## Phase 1: Discovery
+
+You are mapping a landscape. Do NOT go deep. Breadth only.
+
+1. Craft 5 search queries appropriate to the topic. Each query should cover one of these intents:
+   - **Overview** — surveys, introductions, "what is this field"
+   - **History** — origins, foundational work, key figures
+   - **Current** — latest developments, best current thinking, recent results
+   - **Contention** — debates, criticisms, limitations, open questions
+   - **Practice** — real-world usage, implementations, tools, how-to guides
+
+   Tailor the queries to the domain. A technical ML topic will have arxiv papers and benchmarks. A humanities or business topic will have books, essays, and practitioner blogs. Use your judgment.
+
+2. Run the 5 searches in parallel using `delegate_task` with 3 subagents (split the queries across them). Each subagent returns: a list of items, each with `{title, url, 1-line summary}`. No full extracts yet.
+
+3. Collect all results. Deduplicate by URL. You now have a landscape list.
+
+4. From the landscape list, identify:
+   - 3-5 major themes or camps
+   - Key terminology
+   - Rough timeline (when did this start, what are the eras)
+   - Who the major authors/groups are
+
+5. Hold this in context. Do NOT write it anywhere yet — it feeds Phase 2.
+
+---
+
+## Phase 2: Planning
+
+Turn the landscape into a hierarchical question tree.
+
+1. Write 5-8 top-level questions. Cover ALL of these angles:
+   - Problem definition: What problem does this solve? Why does it matter?
+   - Taxonomy: What are the major approaches? How do they differ?
+   - SOTA: What are the best current results? On what benchmarks?
+   - Mechanisms: How do the key methods actually work? (formulations, algorithms)
+   - Tradeoffs: What are the practical pros/cons of each approach?
+   - Open problems: What's unsolved? Where is the field heading?
+   - Practice: What should a practitioner actually use today?
+
+2. Under each top-level question, add 2-4 sub-questions where the topic has known depth. Use the landscape from Phase 1 to inform these — if you saw 3 competing approaches, create sub-questions for each.
+
+3. Assign hierarchical IDs: "1", "1.1", "1.2", "2", "2.1", etc.
+
+4. Set all statuses to "pending".
+
+5. Write plan.json:
+```json
+{
+  "topic": "<topic>",
+  "revision": 0,
+  "questions": [
+    {
+      "id": "1",
+      "question": "...",
+      "status": "pending",
+      "children": [
+        {"id": "1.1", "question": "...", "status": "pending", "children": []}
+      ]
+    }
+  ]
+}
+```
+
+6. **Show the plan to the user. Wait for approval before proceeding.**
+
+---
+
+## Phase 3: Execution
+
+Answer each pending question with evidence. Work depth-first through the tree. Prefer to use the built-in web search, read/write file tools rather than writing new .py scripts. 
+
+For each pending question:
+
+1. **Search**: Run 1-3 web searches. Craft queries from the question text — be specific. Include year constraints if looking for recent work.
+
+2. **Extract**: Pick the 2-3 most relevant URLs from search results. Run `web_extract` on them. For arxiv papers, use the PDF URL: `https://arxiv.org/pdf/XXXX.XXXXX`
+
+3. **Verify**: Cross-check key claims across sources. Note when sources conflict.
+
+4. **Record**: Append to research_notes.md in this exact format:
+
+```markdown
+## [<ID>] <Question text>
+
+**Sources:**
+- [<Title>](<URL>) — <1-line summary of what this source contributes>
+- [<Title>](<URL>) — <1-line summary>
+
+**Findings:**
+- <Key fact 1> (source: <short ref>)
+- <Key fact 2> (source: <short ref>)
+- <Contradiction>: <Source A> says X, but <Source B> says Y
+
+**Follow-up questions:**
+- <New question discovered during research, or "None">
+```
+
+**Parallelism**: Group 2-3 top-level question groups and research them simultaneously using `delegate_task`. Each subagent gets a top-level question AND all its children — related sub-questions are best researched together since the sources overlap. Pass each subagent the full question list and the exact output format above. The subagent searches, extracts, and returns formatted findings for all assigned questions. You then append all results to research_notes.md yourself.
+
+**Subagent prompt template**: "Research these questions about [TOPIC] and return findings in the exact format below. TOP-LEVEL: [ID] Question. SUB-QUESTIONS: [ID] Question, [ID] Question... For EACH question: 1. Search: run 1-2 targeted web searches. 2. Extract: web_extract on 2-3 most relevant URLs. 3. Return in this format: [paste the format above]"
+
+**Appending notes**: Use `execute_code` with `hermes_tools.patch` or `hermes_tools.write_file` to append subagent results to research_notes.md. Do NOT use `read_file` then manual editing — the line-number format causes issues with JSON/markdown manipulation.
+
+**Pace**: Do one batch of 2-3 top-level groups, then proceed to Phase 4 Review. Do NOT execute all questions before reviewing.
+
+---
+
+## Phase 4: Review
+
+Update the plan based on what you learned.
+
+1. Read plan.json and the latest entries in research_notes.md.
+
+2. For each question you just answered:
+   - Set status to "done"
+
+3. Check Follow-up questions from the notes. For each:
+   - If it's substantial and not already covered: add it as a child question with status "pending"
+   - If it's minor or already covered: skip it
+
+4. Check remaining pending questions:
+   - If a pending question is now answered by findings from another question: mark "done"
+   - If a pending question turned out to be irrelevant: mark "dropped"
+
+5. Increment revision number.
+
+6. Write updated plan.json. Use `execute_code` for all plan.json manipulation — JSON parsing, status updates, counting, and convergence checks in one script. Do NOT read plan.json with `read_file` and try to parse it (line-number format breaks JSON parsing).
+
+### Convergence Check
+
+Count: new questions added this revision, and remaining pending questions.
+
+**Continue** → go back to Phase 3 if:
+- There are pending questions remaining
+- More than 1 new question was added this revision
+
+**Stop** → proceed to Phase 5 if:
+- All questions are done or dropped
+- 0-1 new questions were added (the plan has stabilized)
+- You've hit revision 6 (hard cap — wrap up with what you have)
+
+---
+
+## Phase 5: Write-Up
+
+Convert raw notes into a structured report.
+
+1. Read all of research_notes.md.
+
+2. Organize findings into a logical narrative. The section order should follow the topic's natural structure, NOT the question numbering.
+
+3. Write final_report.md. Adapt these section templates to fit the topic — rename, merge, split, or reorder as the material demands. The structure should serve the narrative, not the other way around. These are starting points, not a rigid template:
+
+```markdown
+# <Topic>: Deep Research Report
+
+## Executive Summary
+3-5 sentences. What is this field, what's the current state, what should the reader know.
+
+## Background & Motivation
+Why this problem matters. Historical context. Key definitions.
+
+## Taxonomy of Approaches
+Major categories of methods. Use a comparison table if there are 3+ approaches:
+| Approach | Key Idea | Strengths | Weaknesses | Representative Work |
+
+## State of the Art
+Best current results. Benchmarks. Key papers with dates and venues.
+
+## How It Works
+Technical details of the 2-3 most important methods. Formulations, algorithms, architectures.
+
+## Open Problems & Future Directions
+What's unsolved. Active debates. Emerging trends.
+
+## Practical Recommendations
+Decision framework: "If you need X, use Y because Z."
+Which codebases to start from. Compute requirements.
+
+## References
+All URLs from research_notes.md, deduplicated, organized by topic.
+```
+
+4. Present final_report.md to the user.
+
+---
+
+## Synthetic Data Generation (for GRPO training)
+
+This skill's output maps directly to training data for deep research models (e.g., DeepResearch Bench format):
+
+- **Input**: the prompt (research query)
+- **Output**: final_report.md (the article with citations)
+- **Rubric**: one LLM call per prompt generates task-specific scoring criteria across 4 dimensions (comprehensiveness, insight, instruction-following, readability) with weighted sub-criteria. Sample 5x and average weights for stability.
+- **Reward signal**: LLM-as-judge scores the report against the rubric. Citation accuracy is checked programmatically (scrape URL, ask "does this page support this claim?").
+- **Format**: `{"id": "...", "prompt": "...", "article": "..."}`
+
+Reference benchmark: https://github.com/Ayanami0730/deep_research_bench/ (100 PhD-level tasks, RACE + FACT scoring, Gemini as judge).
+
+## Tips
+
+- Conference papers (NeurIPS, ICML, ICLR) > workshop > preprints. Always note venue + year.
+- Check for withdrawn/retracted papers before citing.
+- If user gives a time/depth constraint, reduce sub-questions per top-level question. Never skip phases.
+- research_notes.md is append-only. Never delete earlier findings.
+- The most valuable output is the taxonomy + decision framework, not a list of papers.
+- Typical convergence: 2-3 iterations (we saw 2 iterations on a non-alcoholic cocktails topic with 25 questions).
+- Phase 3 parallelism works well with delegate_task batches of 3 — each subagent handles a top-level question + its children.
+- For research_notes.md appending via execute_code: use write_file with string concatenation, not patch (the file gets large fast and patch gets slow).
+- When generating research prompts at scale, match real-world distribution: ~16% short open-ended (<15 words), ~35% medium, ~49% detailed. Include non-STEM topics (entertainment, food, business, sports). Don't make every prompt a multi-sentence PhD question.
+
+## Pitfalls (from trial runs)
+
+- **read_file returns line-numbered content** (`  1|text`). Never parse plan.json via `read_file` + `json.loads` — it will fail. Always use `execute_code` with `hermes_tools.read_file` which returns clean content, or use `terminal("cat file")`.
+- **Subagents need the exact output format in their prompt.** If you just say "research this topic," they'll return unstructured prose. Paste the markdown template into the subagent goal.
+- **Bundle parent + children for subagents.** Don't send individual sub-questions as separate subagent tasks — a subagent researching "production methods" will naturally find answers to "vacuum distillation" and "fermentation" children in the same sources.
+- **Most follow-up questions are minor.** During review, be aggressive about skipping follow-ups that are tangential or that would be answered by existing pending questions. Only add follow-ups that represent genuine gaps.
+- **Convergence happens fast.** In testing, 2 iterations (2 batches of 2-3 top-level groups) covered 25 questions. Don't over-plan for many iterations — the hard cap of 6 is rarely needed.
+- **The write-up is the most token-intensive phase.** Read all of research_notes.md before writing. For large note files (400+ lines), this may require reading in chunks or using `execute_code` to extract just the findings bullets.
@@ -820,6 +820,24 @@ Every successful ML paper centers on what Neel Nanda calls "the narrative": a sh

 **If you cannot state your contribution in one sentence, you don't yet have a paper.**

+### The Sources Behind This Guidance
+
+This skill synthesizes writing philosophy from researchers who have published extensively at top venues. The writing philosophy layer was originally compiled by [Orchestra Research](https://github.com/orchestra-research) as the `ml-paper-writing` skill.
+
+| Source | Key Contribution | Link |
+|--------|-----------------|------|
+| **Neel Nanda** (Google DeepMind) | The Narrative Principle, What/Why/So What framework | [How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) |
+| **Sebastian Farquhar** (DeepMind) | 5-sentence abstract formula | [How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) |
+| **Gopen & Swan** | 7 principles of reader expectations | [Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) |
+| **Zachary Lipton** | Word choice, eliminating hedging | [Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) |
+| **Jacob Steinhardt** (UC Berkeley) | Precision, consistent terminology | [Writing Tips](https://bounded-regret.ghost.io/) |
+| **Ethan Perez** (Anthropic) | Micro-level clarity tips | [Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) |
+| **Andrej Karpathy** | Single contribution focus | Various lectures |
+
+**For deeper dives into any of these, see:**
+- [references/writing-guide.md](references/writing-guide.md) — Full explanations with examples
+- [references/sources.md](references/sources.md) — Complete bibliography
+
 ### Time Allocation

 Spend approximately **equal time** on each of:
@@ -4,6 +4,12 @@ This document lists all authoritative sources used to build this skill, organize

 ---

+## Origin & Attribution
+
+The writing philosophy, citation verification workflow, and conference reference materials in this skill were originally compiled by **[Orchestra Research](https://github.com/orchestra-research)** as the `ml-paper-writing` skill (January 2026), drawing on Neel Nanda's blog post and other researcher guides listed below. The skill was integrated into hermes-agent by teknium (January 2026), then expanded into the current `research-paper-writing` pipeline by SHL0MS (April 2026, PR #4654), which added experiment design, execution monitoring, iterative refinement, and submission phases while preserving the original writing philosophy and reference files.
+
+---
+
 ## Writing Philosophy & Guides

 ### Primary Sources (Must-Read)
@@ -17,7 +17,6 @@ from agent.auxiliary_client import (
    call_llm,
    async_call_llm,
    _read_codex_access_token,
-    _get_auxiliary_provider,
    _get_provider_chain,
    _is_payment_error,
    _try_payment_fallback,
@@ -32,12 +31,6 @@ def _clean_env(monkeypatch):
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
-        # Per-task provider/model/direct-endpoint overrides
-        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
-        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
-        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
-        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
-        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)

@@ -568,29 +561,6 @@ class TestGetTextAuxiliaryClient:
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

-    def test_task_direct_endpoint_override(self, monkeypatch):
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("web_extract")
-        assert model == "task-model"
-        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
-        assert mock_openai.call_args.kwargs["api_key"] == "task-key"
-
-    def test_task_direct_endpoint_without_openai_key_uses_placeholder(self, monkeypatch):
-        """Local endpoints without an API key should use 'no-key-required' placeholder."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("web_extract")
-        assert client is not None
-        assert model == "task-model"
-        assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
-        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
-
    def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
        config = {
            "model": {
@@ -879,73 +849,9 @@ class TestAuxiliaryPoolAwareness:



-class TestGetAuxiliaryProvider:
-    """Tests for _get_auxiliary_provider env var resolution."""
-
-    def test_no_task_returns_auto(self):
-        assert _get_auxiliary_provider() == "auto"
-        assert _get_auxiliary_provider("") == "auto"
-
-    def test_auxiliary_prefix_takes_priority(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_context_prefix_fallback(self, monkeypatch):
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        assert _get_auxiliary_provider("compression") == "nous"
-
-    def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        assert _get_auxiliary_provider("compression") == "openrouter"
-
-    def test_auto_value_treated_as_auto(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
-        assert _get_auxiliary_provider("vision") == "auto"
-
-    def test_whitespace_stripped(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "  openrouter  ")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_case_insensitive(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_main_provider(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
-        assert _get_auxiliary_provider("web_extract") == "main"
-
-
 class TestTaskSpecificOverrides:
    """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""

-    def test_text_with_vision_provider_override(self, monkeypatch):
-        """AUXILIARY_VISION_PROVIDER should not affect text tasks."""
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_text_auxiliary_client()  # no task → auto
-        assert model == "google/gemini-3-flash-preview"  # OpenRouter, not Nous
-
-    def test_compression_task_reads_context_prefix(self, monkeypatch):
-        """Compression task should check CONTEXT_COMPRESSION_PROVIDER env var."""
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")  # would win in auto
-        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
-             patch("agent.auxiliary_client.OpenAI"):
-            mock_nous.return_value = {"access_token": "***"}
-            client, model = get_text_auxiliary_client("compression")
-        # Config-first: model comes from config.yaml summary_model default,
-        # but provider is forced to Nous via env var
-        assert client is not None
-
-    def test_web_extract_task_override(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_text_auxiliary_client("web_extract")
-        assert model == "google/gemini-3-flash-preview"
-
    def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
        hermes_home = tmp_path / "hermes"
        hermes_home.mkdir(parents=True, exist_ok=True)
@@ -971,24 +877,71 @@ class TestTaskSpecificOverrides:
            client, model = get_text_auxiliary_client("compression")
        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter

-    def test_compression_summary_base_url_from_config(self, monkeypatch, tmp_path):
-        """compression.summary_base_url should produce a custom-endpoint client."""
+    def test_resolve_auto_prefers_live_main_runtime_over_persisted_config(self, monkeypatch, tmp_path):
+        """Session-only live model switches should override persisted config for auto routing."""
        hermes_home = tmp_path / "hermes"
        hermes_home.mkdir(parents=True, exist_ok=True)
        (hermes_home / "config.yaml").write_text(
-            """compression:
-  summary_provider: custom
-  summary_model: glm-4.7
-  summary_base_url: https://api.z.ai/api/coding/paas/v4
+            """model:
+  default: glm-5.1
+  provider: opencode-go
 """
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        # Custom endpoints need an API key to build the client
-        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("compression")
-        assert model == "glm-4.7"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://api.z.ai/api/coding/paas/v4"
+
+        calls = []
+
+        def _fake_resolve(provider, model=None, *args, **kwargs):
+            calls.append((provider, model, kwargs))
+            return MagicMock(), model or "resolved-model"
+
+        with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_fake_resolve):
+            client, model = _resolve_auto(
+                main_runtime={
+                    "provider": "openai-codex",
+                    "model": "gpt-5.4",
+                    "api_mode": "codex_responses",
+                }
+            )
+
+        assert client is not None
+        assert model == "gpt-5.4"
+        assert calls[0][0] == "openai-codex"
+        assert calls[0][1] == "gpt-5.4"
+        assert calls[0][2]["api_mode"] == "codex_responses"
+
+    def test_explicit_compression_pin_still_wins_over_live_main_runtime(self, monkeypatch, tmp_path):
+        """Task-level compression config should beat a live session override."""
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "config.yaml").write_text(
+            """auxiliary:
+  compression:
+    provider: openrouter
+    model: google/gemini-3-flash-preview
+model:
+  default: glm-5.1
+  provider: opencode-go
+"""
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(MagicMock(), "google/gemini-3-flash-preview")) as mock_resolve:
+            client, model = get_text_auxiliary_client(
+                "compression",
+                main_runtime={
+                    "provider": "openai-codex",
+                    "model": "gpt-5.4",
+                },
+            )
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_resolve.call_args.args[0] == "openrouter"
+        assert mock_resolve.call_args.kwargs["main_runtime"] == {
+            "provider": "openai-codex",
+            "model": "gpt-5.4",
+        }


 class TestAuxiliaryMaxTokensParam:
@@ -1560,3 +1513,74 @@ class TestStaleBaseUrlWarning:

        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
            "Warning should not fire a second time"
+
+
+# ---------------------------------------------------------------------------
+# Anthropic-compatible image block conversion
+# ---------------------------------------------------------------------------
+
+class TestAnthropicCompatImageConversion:
+    """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic."""
+
+    def test_known_providers_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("minimax", "")
+        assert _is_anthropic_compat_endpoint("minimax-cn", "")
+
+    def test_openrouter_not_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert not _is_anthropic_compat_endpoint("openrouter", "")
+        assert not _is_anthropic_compat_endpoint("anthropic", "")
+
+    def test_url_based_detection(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic")
+        assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1")
+        assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1")
+
+    def test_base64_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "describe"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][1]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "base64"
+        assert img_block["source"]["media_type"] == "image/png"
+        assert img_block["source"]["data"] == "iVBOR="
+
+    def test_url_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][0]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "url"
+        assert img_block["source"]["url"] == "https://example.com/img.jpg"
+
+    def test_text_only_messages_unchanged(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{"role": "user", "content": "Hello"}]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0] is messages[0]  # same object, not copied
+
+    def test_jpeg_media_type_parsed(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
@@ -273,18 +273,6 @@ class TestDefaultConfigShape:
        assert web["provider"] == "auto"
        assert web["model"] == ""

-    def test_compression_provider_default(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-        compression = DEFAULT_CONFIG["compression"]
-        assert "summary_provider" in compression
-        assert compression["summary_provider"] == "auto"
-
-    def test_compression_base_url_default(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-        compression = DEFAULT_CONFIG["compression"]
-        assert "summary_base_url" in compression
-        assert compression["summary_base_url"] is None
-

 # ── CLI defaults parity ─────────────────────────────────────────────────────

@@ -12,17 +12,6 @@ def _isolate(tmp_path, monkeypatch):
    hermes_home = tmp_path / ".hermes"
    hermes_home.mkdir()
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    for env_var in (
-        "AUXILIARY_VISION_PROVIDER",
-        "AUXILIARY_VISION_MODEL",
-        "AUXILIARY_VISION_BASE_URL",
-        "AUXILIARY_VISION_API_KEY",
-        "CONTEXT_VISION_PROVIDER",
-        "CONTEXT_VISION_MODEL",
-        "CONTEXT_VISION_BASE_URL",
-        "CONTEXT_VISION_API_KEY",
-    ):
-        monkeypatch.delenv(env_var, raising=False)
    # Write a minimal config so load_config doesn't fail
    (hermes_home / "config.yaml").write_text("model:\n  default: test-model\n")

@@ -69,6 +58,10 @@ class TestNormalizeVisionProvider:
        assert _normalize_vision_provider("beans") == "beans"
        assert _normalize_vision_provider("deepseek") == "deepseek"

+    def test_custom_colon_named_provider_preserved(self):
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("custom:beans") == "beans"
+
    def test_codex_alias_still_works(self):
        from agent.auxiliary_client import _normalize_vision_provider
        assert _normalize_vision_provider("codex") == "openai-codex"
@@ -240,3 +233,22 @@ class TestResolveVisionProviderClientModelNormalization:
        assert provider == "zai"
        assert client is not None
        assert model == "glm-5.1"
+
+
+class TestVisionPathApiMode:
+    """Vision path should propagate api_mode to _get_cached_client."""
+
+    def test_explicit_provider_passes_api_mode(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "test-model"},
+            "auxiliary": {"vision": {"api_mode": "chat_completions"}},
+        })
+        with patch("agent.auxiliary_client._get_cached_client") as mock_gcc:
+            mock_gcc.return_value = (MagicMock(), "test-model")
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client(provider="deepseek")
+
+        mock_gcc.assert_called_once()
+        _, kwargs = mock_gcc.call_args
+        assert kwargs.get("api_mode") == "chat_completions"
@@ -0,0 +1,139 @@
+"""Tests for focus_topic flowing through the compressor.
+
+Verifies that _generate_summary and compress accept and use the focus_topic
+parameter correctly.  Inspired by Claude Code's /compact <focus>.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from agent.context_compressor import ContextCompressor
+
+
+def _make_compressor():
+    """Create a ContextCompressor with minimal state for testing."""
+    compressor = ContextCompressor.__new__(ContextCompressor)
+    compressor.protect_first_n = 2
+    compressor.protect_last_n = 5
+    compressor.tail_token_budget = 20000
+    compressor.context_length = 200000
+    compressor.threshold_percent = 0.80
+    compressor.threshold_tokens = 160000
+    compressor.max_summary_tokens = 10000
+    compressor.quiet_mode = True
+    compressor.compression_count = 0
+    compressor.last_prompt_tokens = 0
+    compressor._previous_summary = None
+    compressor._summary_failure_cooldown_until = 0.0
+    compressor.summary_model = None
+    return compressor
+
+
+def test_focus_topic_injected_into_summary_prompt():
+    """When focus_topic is provided, the LLM prompt includes focus guidance."""
+    compressor = _make_compressor()
+    turns = [
+        {"role": "user", "content": "Tell me about the database schema"},
+        {"role": "assistant", "content": "The schema has tables: users, orders, products."},
+    ]
+
+    captured_prompt = {}
+
+    def mock_call_llm(**kwargs):
+        captured_prompt["messages"] = kwargs["messages"]
+        resp = MagicMock()
+        resp.choices = [MagicMock()]
+        resp.choices[0].message.content = "## Goal\nUnderstand DB schema."
+        return resp
+
+    with patch("agent.context_compressor.call_llm", mock_call_llm):
+        result = compressor._generate_summary(turns, focus_topic="database schema")
+
+    assert result is not None
+    prompt_text = captured_prompt["messages"][0]["content"]
+    assert 'FOCUS TOPIC: "database schema"' in prompt_text
+    assert "PRIORITISE" in prompt_text
+    assert "60-70%" in prompt_text
+
+
+def test_no_focus_topic_no_injection():
+    """Without focus_topic, the prompt doesn't contain focus guidance."""
+    compressor = _make_compressor()
+    turns = [
+        {"role": "user", "content": "Hello"},
+        {"role": "assistant", "content": "Hi"},
+    ]
+
+    captured_prompt = {}
+
+    def mock_call_llm(**kwargs):
+        captured_prompt["messages"] = kwargs["messages"]
+        resp = MagicMock()
+        resp.choices = [MagicMock()]
+        resp.choices[0].message.content = "## Goal\nGreeting."
+        return resp
+
+    with patch("agent.context_compressor.call_llm", mock_call_llm):
+        result = compressor._generate_summary(turns)
+
+    prompt_text = captured_prompt["messages"][0]["content"]
+    assert "FOCUS TOPIC" not in prompt_text
+
+
+def test_compress_passes_focus_to_generate_summary():
+    """compress() passes focus_topic through to _generate_summary."""
+    compressor = _make_compressor()
+
+    # Track what _generate_summary receives
+    received_kwargs = {}
+    original_generate = compressor._generate_summary
+
+    def tracking_generate(turns, **kwargs):
+        received_kwargs.update(kwargs)
+        return "## Goal\nTest."
+
+    compressor._generate_summary = tracking_generate
+
+    messages = [
+        {"role": "system", "content": "System prompt"},
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "reply1"},
+        {"role": "user", "content": "second"},
+        {"role": "assistant", "content": "reply2"},
+        {"role": "user", "content": "third"},
+        {"role": "assistant", "content": "reply3"},
+        {"role": "user", "content": "fourth"},
+        {"role": "assistant", "content": "reply4"},
+    ]
+
+    compressor.compress(messages, current_tokens=100000, focus_topic="authentication flow")
+
+    assert received_kwargs.get("focus_topic") == "authentication flow"
+
+
+def test_compress_none_focus_by_default():
+    """compress() passes None focus_topic by default."""
+    compressor = _make_compressor()
+
+    received_kwargs = {}
+
+    def tracking_generate(turns, **kwargs):
+        received_kwargs.update(kwargs)
+        return "## Goal\nTest."
+
+    compressor._generate_summary = tracking_generate
+
+    messages = [
+        {"role": "system", "content": "System prompt"},
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "reply1"},
+        {"role": "user", "content": "second"},
+        {"role": "assistant", "content": "reply2"},
+        {"role": "user", "content": "third"},
+        {"role": "assistant", "content": "reply3"},
+        {"role": "user", "content": "fourth"},
+        {"role": "assistant", "content": "reply4"},
+    ]
+
+    compressor.compress(messages, current_tokens=100000)
+
+    assert received_kwargs.get("focus_topic") is None
@@ -191,6 +191,37 @@ class TestNonStringContent:
        kwargs = mock_call.call_args.kwargs
        assert "temperature" not in kwargs

+    def test_summary_call_passes_live_main_runtime(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "ok"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(
+                model="gpt-5.4",
+                provider="openai-codex",
+                base_url="https://chatgpt.com/backend-api/codex",
+                api_key="codex-token",
+                api_mode="codex_responses",
+                quiet_mode=True,
+            )
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
+            c._generate_summary(messages)
+
+        assert mock_call.call_args.kwargs["main_runtime"] == {
+            "model": "gpt-5.4",
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+            "api_mode": "codex_responses",
+        }
+

 class TestSummaryFailureCooldown:
    def test_summary_failure_enters_cooldown_and_skips_retry(self):
@@ -308,6 +308,34 @@ class TestMinimaxPreserveDots:
        from run_agent import AIAgent
        assert AIAgent._anthropic_preserve_dots(agent) is False

+    def test_opencode_zen_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="opencode-zen", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_opencode_zen_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://opencode.ai/zen/v1")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_zai_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="zai", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_bigmodel_cn_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://open.bigmodel.cn/api/paas/v4")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_normalize_preserves_m25_free_dot(self):
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name("minimax-m2.5-free", preserve_dots=True) == "minimax-m2.5-free"
+
    def test_normalize_preserves_m27_dot(self):
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
@@ -70,6 +70,44 @@ class TestQueryLocalContextLengthOllama:

        assert result == 32768

+    def test_ollama_num_ctx_wins_over_model_info(self):
+        """When both num_ctx (Modelfile) and model_info (GGUF) are present,
+        num_ctx wins because it's the *runtime* context Ollama actually
+        allocates KV cache for. The GGUF model_info.context_length is the
+        training max — using it would let Hermes grow conversations past
+        the runtime limit and Ollama would silently truncate.
+
+        Concrete example: hermes-brain:qwen3-14b-ctx32k is a Modelfile
+        derived from qwen3:14b with `num_ctx 32768`, but the underlying
+        GGUF reports `qwen3.context_length: 40960` (training max). If
+        Hermes used 40960 it would let the conversation grow past 32768
+        before compressing, and Ollama would truncate the prefix.
+        """
+        from agent.model_metadata import _query_local_context_length
+
+        show_resp = self._make_resp(200, {
+            "model_info": {"qwen3.context_length": 40960},
+            "parameters": "num_ctx                        32768\ntemperature                    0.6\n",
+        })
+        models_resp = self._make_resp(404, {})
+
+        client_mock = MagicMock()
+        client_mock.__enter__ = lambda s: client_mock
+        client_mock.__exit__ = MagicMock(return_value=False)
+        client_mock.post.return_value = show_resp
+        client_mock.get.return_value = models_resp
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"), \
+             patch("httpx.Client", return_value=client_mock):
+            result = _query_local_context_length(
+                "hermes-brain:qwen3-14b-ctx32k", "http://100.77.243.5:11434/v1"
+            )
+
+        assert result == 32768, (
+            f"Expected num_ctx (32768) to win over model_info (40960), got {result}. "
+            "If Hermes uses the GGUF training max, conversations will silently truncate."
+        )
+
    def test_ollama_show_404_falls_through(self):
        """When /api/show returns 404, falls through to /v1/models/{model}."""
        from agent.model_metadata import _query_local_context_length
@@ -87,7 +87,10 @@ class TestProviderMapping:

    def test_unmapped_provider_not_in_dict(self):
        assert "nous" not in PROVIDER_TO_MODELS_DEV
-        assert "openai-codex" not in PROVIDER_TO_MODELS_DEV
+
+    def test_openai_codex_mapped_to_openai(self):
+        assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
+        assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"


 class TestExtractContext:
@@ -18,6 +18,7 @@ from agent.prompt_builder import (
    build_skills_system_prompt,
    build_nous_subscription_prompt,
    build_context_files_prompt,
+    build_environment_hints,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
@@ -26,6 +27,7 @@ from agent.prompt_builder import (
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
+    WSL_ENVIRONMENT_HINT,
 )
 from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures

@@ -770,6 +772,29 @@ class TestPromptBuilderConstants:
        assert "cli" in PLATFORM_HINTS


+# =========================================================================
+# Environment hints
+# =========================================================================
+
+class TestEnvironmentHints:
+    def test_wsl_hint_constant_mentions_mnt(self):
+        assert "/mnt/c/" in WSL_ENVIRONMENT_HINT
+        assert "WSL" in WSL_ENVIRONMENT_HINT
+
+    def test_build_environment_hints_on_wsl(self, monkeypatch):
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: True)
+        result = _pb.build_environment_hints()
+        assert "/mnt/" in result
+        assert "WSL" in result
+
+    def test_build_environment_hints_not_wsl(self, monkeypatch):
+        import agent.prompt_builder as _pb
+        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
+        result = _pb.build_environment_hints()
+        assert result == ""
+
+
 # =========================================================================
 # Conditional skill activation
 # =========================================================================
@@ -51,10 +51,10 @@ class TestSaveConfigValueAtomic:
    def test_creates_nested_keys(self, config_env):
        """Dot-separated paths create intermediate dicts as needed."""
        from cli import save_config_value
-        save_config_value("compression.summary_model", "google/gemini-3-flash-preview")
+        save_config_value("auxiliary.compression.model", "google/gemini-3-flash-preview")

        result = yaml.safe_load(config_env.read_text())
-        assert result["compression"]["summary_model"] == "google/gemini-3-flash-preview"
+        assert result["auxiliary"]["compression"]["model"] == "google/gemini-3-flash-preview"

    def test_overwrites_existing_value(self, config_env):
        """Updating an existing key replaces the value."""
@@ -0,0 +1,118 @@
+"""Tests for /compress <focus> — guided compression with focus topic.
+
+Inspired by Claude Code's /compact <focus> feature.
+"""
+
+from unittest.mock import MagicMock, patch
+
+from tests.cli.test_cli_init import _make_cli
+
+
+def _make_history() -> list[dict[str, str]]:
+    return [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "two"},
+        {"role": "user", "content": "three"},
+        {"role": "assistant", "content": "four"},
+    ]
+
+
+def test_focus_topic_extracted_and_passed(capsys):
+    """Focus topic is extracted from the command and passed to _compress_context."""
+    shell = _make_cli()
+    history = _make_history()
+    compressed = [history[0], history[-1]]
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (compressed, "")
+
+    def _estimate(messages):
+        if messages is history:
+            return 100
+        return 50
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
+        shell._manual_compress("/compress database schema")
+
+    output = capsys.readouterr().out
+    assert 'focus: "database schema"' in output
+
+    # Verify focus_topic was passed through
+    shell.agent._compress_context.assert_called_once()
+    call_kwargs = shell.agent._compress_context.call_args
+    assert call_kwargs.kwargs.get("focus_topic") == "database schema"
+
+
+def test_no_focus_topic_when_bare_command(capsys):
+    """When no focus topic is provided, None is passed."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (list(history), "")
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress("/compress")
+
+    shell.agent._compress_context.assert_called_once()
+    call_kwargs = shell.agent._compress_context.call_args
+    assert call_kwargs.kwargs.get("focus_topic") is None
+
+
+def test_empty_focus_after_command_treated_as_none(capsys):
+    """Trailing whitespace after /compress does not produce a focus topic."""
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (list(history), "")
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress("/compress   ")
+
+    shell.agent._compress_context.assert_called_once()
+    call_kwargs = shell.agent._compress_context.call_args
+    assert call_kwargs.kwargs.get("focus_topic") is None
+
+
+def test_focus_topic_printed_in_compression_banner(capsys):
+    """The focus topic shows in the compression progress banner."""
+    shell = _make_cli()
+    history = _make_history()
+    compressed = [history[0], history[-1]]
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (compressed, "")
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress("/compress API endpoints")
+
+    output = capsys.readouterr().out
+    assert 'focus: "API endpoints"' in output
+
+
+def test_no_focus_prints_standard_banner(capsys):
+    """Without focus, the standard banner (no focus: line) is printed."""
+    shell = _make_cli()
+    history = _make_history()
+    compressed = [history[0], history[-1]]
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (compressed, "")
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100):
+        shell._manual_compress("/compress")
+
+    output = capsys.readouterr().out
+    assert "focus:" not in output
+    assert "Compressing" in output
@@ -180,33 +180,71 @@ class TestDisplayResumedHistory:
        assert 200 <= a_count <= 310  # roughly 300 chars (±panel padding)

    def test_long_assistant_message_truncated(self):
+        """Non-last assistant messages are still truncated."""
        cli = _make_cli()
        long_text = "B" * 400
        cli.conversation_history = [
            {"role": "user", "content": "Tell me a lot."},
            {"role": "assistant", "content": long_text},
+            {"role": "user", "content": "And more?"},
+            {"role": "assistant", "content": "Short final reply."},
        ]
        output = self._capture_display(cli)

-        assert "..." in output
+        # The non-last assistant message should be truncated
        assert "B" * 400 not in output
+        # The last assistant message shown in full
+        assert "Short final reply." in output

    def test_multiline_assistant_truncated(self):
+        """Non-last multiline assistant messages are truncated to 3 lines."""
        cli = _make_cli()
        multi = "\n".join([f"Line {i}" for i in range(20)])
        cli.conversation_history = [
            {"role": "user", "content": "Show me lines."},
            {"role": "assistant", "content": multi},
+            {"role": "user", "content": "What else?"},
+            {"role": "assistant", "content": "Done."},
        ]
        output = self._capture_display(cli)

-        # First 3 lines should be there
+        # First 3 lines of non-last assistant should be there
        assert "Line 0" in output
        assert "Line 1" in output
        assert "Line 2" in output
-        # Line 19 should NOT be there (truncated after 3 lines)
+        # Line 19 should NOT be in the truncated message
        assert "Line 19" not in output

+    def test_last_assistant_response_shown_in_full(self):
+        """The last assistant response is shown un-truncated so the user
+        knows where they left off without wasting tokens re-asking."""
+        cli = _make_cli()
+        long_text = "X" * 500
+        cli.conversation_history = [
+            {"role": "user", "content": "Tell me everything."},
+            {"role": "assistant", "content": long_text},
+        ]
+        output = self._capture_display(cli)
+
+        # Full 500-char text should be present (may be line-wrapped by Rich)
+        x_count = output.count("X")
+        assert x_count >= 490  # allow small Rich formatting variance
+
+    def test_last_assistant_multiline_shown_in_full(self):
+        """The last assistant response shows all lines, not just 3."""
+        cli = _make_cli()
+        multi = "\n".join([f"Line {i}" for i in range(20)])
+        cli.conversation_history = [
+            {"role": "user", "content": "Show me everything."},
+            {"role": "assistant", "content": multi},
+        ]
+        output = self._capture_display(cli)
+
+        # All 20 lines should be present since it's the last response
+        assert "Line 0" in output
+        assert "Line 10" in output
+        assert "Line 19" in output
+
    def test_large_history_shows_truncation_indicator(self):
        cli = _make_cli()
        cli.conversation_history = _large_history(n_exchanges=15)
@@ -0,0 +1,189 @@
+"""Tests for stacked tool progress scrollback lines in the CLI TUI.
+
+When tool_progress_mode is "all" or "new", _on_tool_progress should print
+persistent lines to scrollback on tool.completed, restoring the stacked
+tool history that was lost when the TUI switched to a single-line spinner.
+"""
+
+import os
+import sys
+import importlib
+from unittest.mock import MagicMock, patch
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+# Module-level reference to the cli module (set by _make_cli on first call)
+_cli_mod = None
+
+
+def _make_cli(tool_progress="all"):
+    """Create a HermesCLI instance with minimal mocking."""
+    global _cli_mod
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": tool_progress},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    prompt_toolkit_stubs = {
+        "prompt_toolkit": MagicMock(),
+        "prompt_toolkit.history": MagicMock(),
+        "prompt_toolkit.styles": MagicMock(),
+        "prompt_toolkit.patch_stdout": MagicMock(),
+        "prompt_toolkit.application": MagicMock(),
+        "prompt_toolkit.layout": MagicMock(),
+        "prompt_toolkit.layout.processors": MagicMock(),
+        "prompt_toolkit.filters": MagicMock(),
+        "prompt_toolkit.layout.dimension": MagicMock(),
+        "prompt_toolkit.layout.menus": MagicMock(),
+        "prompt_toolkit.widgets": MagicMock(),
+        "prompt_toolkit.key_binding": MagicMock(),
+        "prompt_toolkit.completion": MagicMock(),
+        "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
+    }
+    with patch.dict(sys.modules, prompt_toolkit_stubs), \
+         patch.dict("os.environ", clean_env, clear=False):
+        import cli as mod
+        mod = importlib.reload(mod)
+        _cli_mod = mod
+        with patch.object(mod, "get_tool_definitions", return_value=[]), \
+             patch.dict(mod.__dict__, {"CLI_CONFIG": _clean_config}):
+            return mod.HermesCLI()
+
+
+class TestToolProgressScrollback:
+    """Stacked scrollback lines for 'all' and 'new' modes."""
+
+    def test_all_mode_prints_scrollback_on_completed(self):
+        """In 'all' mode, tool.completed prints a stacked line."""
+        cli = _make_cli(tool_progress="all")
+        # Simulate tool.started
+        cli._on_tool_progress("tool.started", "terminal", "git log", {"command": "git log"})
+        # Simulate tool.completed
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=1.5, is_error=False)
+
+        mock_print.assert_called_once()
+        line = mock_print.call_args[0][0]
+        # Should contain tool info (the cute message format has "git log" for terminal)
+        assert "git log" in line or "$" in line
+
+    def test_all_mode_prints_every_call(self):
+        """In 'all' mode, consecutive calls to the same tool each get a line."""
+        cli = _make_cli(tool_progress="all")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            # First call
+            cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
+            # Second call (same tool)
+            cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
+
+        assert mock_print.call_count == 2
+
+    def test_new_mode_skips_consecutive_repeats(self):
+        """In 'new' mode, consecutive calls to the same tool only print once."""
+        cli = _make_cli(tool_progress="new")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
+            cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
+
+        assert mock_print.call_count == 1  # Only the first read_file
+
+    def test_new_mode_prints_when_tool_changes(self):
+        """In 'new' mode, a different tool name triggers a new line."""
+        cli = _make_cli(tool_progress="new")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.started", "read_file", "cli.py", {"path": "cli.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.1, is_error=False)
+            cli._on_tool_progress("tool.started", "search_files", "pattern", {"pattern": "test"})
+            cli._on_tool_progress("tool.completed", "search_files", None, None, duration=0.3, is_error=False)
+            cli._on_tool_progress("tool.started", "read_file", "run_agent.py", {"path": "run_agent.py"})
+            cli._on_tool_progress("tool.completed", "read_file", None, None, duration=0.2, is_error=False)
+
+        # read_file, search_files, read_file (3rd prints because search_files broke the streak)
+        assert mock_print.call_count == 3
+
+    def test_off_mode_no_scrollback(self):
+        """In 'off' mode, no stacked lines are printed."""
+        cli = _make_cli(tool_progress="off")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
+
+        mock_print.assert_not_called()
+
+    def test_error_suffix_on_failed_tool(self):
+        """When is_error=True, the stacked line includes [error]."""
+        cli = _make_cli(tool_progress="all")
+        cli._on_tool_progress("tool.started", "terminal", "bad cmd", {"command": "bad cmd"})
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=True)
+
+        line = mock_print.call_args[0][0]
+        assert "[error]" in line
+
+    def test_spinner_still_updates_on_started(self):
+        """tool.started still updates the spinner text for live display."""
+        cli = _make_cli(tool_progress="all")
+        cli._on_tool_progress("tool.started", "terminal", "git status", {"command": "git status"})
+        assert "git status" in cli._spinner_text
+
+    def test_spinner_timer_clears_on_completed(self):
+        """tool.completed still clears the tool timer."""
+        cli = _make_cli(tool_progress="all")
+        cli._on_tool_progress("tool.started", "terminal", "git status", {"command": "git status"})
+        assert cli._tool_start_time > 0
+        with patch.object(_cli_mod, "_cprint"):
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
+        assert cli._tool_start_time == 0.0
+
+    def test_concurrent_tools_produce_stacked_lines(self):
+        """Multiple tool.started followed by multiple tool.completed all produce lines."""
+        cli = _make_cli(tool_progress="all")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            # All start first (concurrent pattern)
+            cli._on_tool_progress("tool.started", "web_search", "query 1", {"query": "test 1"})
+            cli._on_tool_progress("tool.started", "web_search", "query 2", {"query": "test 2"})
+            # All complete
+            cli._on_tool_progress("tool.completed", "web_search", None, None, duration=1.0, is_error=False)
+            cli._on_tool_progress("tool.completed", "web_search", None, None, duration=1.5, is_error=False)
+
+        assert mock_print.call_count == 2
+
+    def test_verbose_mode_no_duplicate_scrollback(self):
+        """In 'verbose' mode, scrollback lines are NOT printed (run_agent handles verbose output)."""
+        cli = _make_cli(tool_progress="verbose")
+        with patch.object(_cli_mod, "_cprint") as mock_print:
+            cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.5, is_error=False)
+
+        mock_print.assert_not_called()
+
+    def test_pending_info_stores_on_started(self):
+        """tool.started stores args for later use by tool.completed."""
+        cli = _make_cli(tool_progress="all")
+        cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
+        assert "terminal" in cli._pending_tool_info
+        assert len(cli._pending_tool_info["terminal"]) == 1
+        assert cli._pending_tool_info["terminal"][0] == {"command": "ls"}
+
+    def test_pending_info_consumed_on_completed(self):
+        """tool.completed consumes stored args (FIFO for concurrent)."""
+        cli = _make_cli(tool_progress="all")
+        cli._on_tool_progress("tool.started", "terminal", "ls", {"command": "ls"})
+        cli._on_tool_progress("tool.started", "terminal", "pwd", {"command": "pwd"})
+        assert len(cli._pending_tool_info["terminal"]) == 2
+        with patch.object(_cli_mod, "_cprint"):
+            cli._on_tool_progress("tool.completed", "terminal", None, None, duration=0.1, is_error=False)
+        # First entry consumed, second remains
+        assert len(cli._pending_tool_info.get("terminal", [])) == 1
+        assert cli._pending_tool_info["terminal"][0] == {"command": "pwd"}
@@ -0,0 +1,87 @@
+"""Tests for _normalize_chat_content in the API server adapter."""
+
+from gateway.platforms.api_server import _normalize_chat_content
+
+
+class TestNormalizeChatContent:
+    """Content normalization converts array-based content parts to plain text."""
+
+    def test_none_returns_empty_string(self):
+        assert _normalize_chat_content(None) == ""
+
+    def test_plain_string_returned_as_is(self):
+        assert _normalize_chat_content("hello world") == "hello world"
+
+    def test_empty_string_returned_as_is(self):
+        assert _normalize_chat_content("") == ""
+
+    def test_text_content_part(self):
+        content = [{"type": "text", "text": "hello"}]
+        assert _normalize_chat_content(content) == "hello"
+
+    def test_input_text_content_part(self):
+        content = [{"type": "input_text", "text": "user input"}]
+        assert _normalize_chat_content(content) == "user input"
+
+    def test_output_text_content_part(self):
+        content = [{"type": "output_text", "text": "assistant output"}]
+        assert _normalize_chat_content(content) == "assistant output"
+
+    def test_multiple_text_parts_joined_with_newline(self):
+        content = [
+            {"type": "text", "text": "first"},
+            {"type": "text", "text": "second"},
+        ]
+        assert _normalize_chat_content(content) == "first\nsecond"
+
+    def test_mixed_string_and_dict_parts(self):
+        content = ["plain string", {"type": "text", "text": "dict part"}]
+        assert _normalize_chat_content(content) == "plain string\ndict part"
+
+    def test_image_url_parts_silently_skipped(self):
+        content = [
+            {"type": "text", "text": "check this:"},
+            {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
+        ]
+        assert _normalize_chat_content(content) == "check this:"
+
+    def test_integer_content_converted(self):
+        assert _normalize_chat_content(42) == "42"
+
+    def test_boolean_content_converted(self):
+        assert _normalize_chat_content(True) == "True"
+
+    def test_deeply_nested_list_respects_depth_limit(self):
+        """Nesting beyond max_depth returns empty string."""
+        content = [[[[[[[[[[[["deep"]]]]]]]]]]]]
+        result = _normalize_chat_content(content)
+        # The deep nesting should be truncated, not crash
+        assert isinstance(result, str)
+
+    def test_large_list_capped(self):
+        """Lists beyond MAX_CONTENT_LIST_SIZE are truncated."""
+        content = [{"type": "text", "text": f"item{i}"} for i in range(2000)]
+        result = _normalize_chat_content(content)
+        # Should not contain all 2000 items
+        assert result.count("item") <= 1000
+
+    def test_oversized_string_truncated(self):
+        """Strings beyond 64KB are truncated."""
+        huge = "x" * 100_000
+        result = _normalize_chat_content(huge)
+        assert len(result) == 65_536
+
+    def test_empty_text_parts_filtered(self):
+        content = [
+            {"type": "text", "text": ""},
+            {"type": "text", "text": "actual"},
+            {"type": "text", "text": ""},
+        ]
+        assert _normalize_chat_content(content) == "actual"
+
+    def test_dict_without_type_skipped(self):
+        content = [{"foo": "bar"}, {"type": "text", "text": "real"}]
+        assert _normalize_chat_content(content) == "real"
+
+    def test_empty_list_returns_empty(self):
+        assert _normalize_chat_content([]) == ""
@@ -0,0 +1,226 @@
+"""Tests for the clean shutdown marker that prevents unwanted session auto-resets.
+
+When the gateway shuts down gracefully (hermes update, gateway restart, /restart),
+it writes a .clean_shutdown marker.  On the next startup, if the marker exists,
+suspend_recently_active() is skipped so users don't lose their sessions.
+
+After a crash (no marker), suspension still fires as a safety net for stuck sessions.
+"""
+
+import os
+from datetime import datetime, timedelta
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig, SessionResetPolicy
+from gateway.session import SessionEntry, SessionSource, SessionStore
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
+    return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
+
+
+def _make_store(tmp_path, policy=None):
+    config = GatewayConfig()
+    if policy:
+        config.default_reset_policy = policy
+    return SessionStore(sessions_dir=tmp_path, config=config)
+
+
+# ---------------------------------------------------------------------------
+# SessionStore.suspend_recently_active
+# ---------------------------------------------------------------------------
+
+class TestSuspendRecentlyActive:
+    """Verify suspend_recently_active only marks recent sessions."""
+
+    def test_suspends_recently_active_sessions(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        count = store.suspend_recently_active()
+        assert count == 1
+
+        # Re-fetch — should be suspended now
+        refreshed = store.get_or_create_session(source)
+        assert refreshed.was_auto_reset
+
+    def test_does_not_suspend_old_sessions(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        # Backdate the session's updated_at beyond the cutoff
+        with store._lock:
+            entry.updated_at = datetime.now() - timedelta(seconds=300)
+            store._save()
+
+        count = store.suspend_recently_active(max_age_seconds=120)
+        assert count == 0
+
+    def test_already_suspended_not_double_counted(self, tmp_path):
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+
+        # Suspend once
+        count1 = store.suspend_recently_active()
+        assert count1 == 1
+
+        # Create a new session (the old one got reset on next access)
+        entry2 = store.get_or_create_session(source)
+
+        # Suspend again — the new session is recent but not yet suspended
+        count2 = store.suspend_recently_active()
+        assert count2 == 1
+
+
+# ---------------------------------------------------------------------------
+# Clean shutdown marker integration
+# ---------------------------------------------------------------------------
+
+class TestCleanShutdownMarker:
+    """Test that the marker file controls session suspension on startup."""
+
+    def test_marker_written_on_graceful_stop(self, tmp_path, monkeypatch):
+        """stop() should write .clean_shutdown marker."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        marker = tmp_path / ".clean_shutdown"
+        assert not marker.exists()
+
+        # Create a minimal runner and call the shutdown logic directly
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._restart_requested = False
+        runner._restart_detached = False
+        runner._restart_via_service = False
+        runner._restart_task_started = False
+        runner._running = True
+        runner._draining = False
+        runner._stop_task = None
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._pending_approvals = {}
+        runner._background_tasks = set()
+        runner._shutdown_event = MagicMock()
+        runner._restart_drain_timeout = 5
+        runner._exit_code = None
+        runner._exit_reason = None
+        runner.adapters = {}
+        runner.config = GatewayConfig()
+
+        # Mock heavy dependencies
+        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
+             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
+             patch("gateway.run.GatewayRunner._update_runtime_status"), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("tools.process_registry.process_registry") as mock_proc_reg, \
+             patch("tools.terminal_tool.cleanup_all_environments"), \
+             patch("tools.browser_tool.cleanup_all_browsers"):
+            mock_proc_reg.kill_all = MagicMock()
+
+            import asyncio
+            asyncio.get_event_loop().run_until_complete(runner.stop())
+
+        assert marker.exists(), ".clean_shutdown marker should exist after graceful stop"
+
+    def test_marker_skips_suspension_on_startup(self, tmp_path, monkeypatch):
+        """If .clean_shutdown exists, suspend_recently_active should NOT be called."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+
+        # Create the marker
+        marker = tmp_path / ".clean_shutdown"
+        marker.touch()
+
+        # Create a store with a recently active session
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        # Simulate what start() does:
+        if marker.exists():
+            marker.unlink()
+            # Should NOT call suspend_recently_active
+        else:
+            store.suspend_recently_active()
+
+        # Session should NOT be suspended
+        with store._lock:
+            store._ensure_loaded_locked()
+            for e in store._entries.values():
+                assert not e.suspended, "Session should NOT be suspended after clean shutdown"
+
+        assert not marker.exists(), "Marker should be cleaned up"
+
+    def test_no_marker_triggers_suspension(self, tmp_path, monkeypatch):
+        """Without .clean_shutdown marker (crash), suspension should fire."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+
+        marker = tmp_path / ".clean_shutdown"
+        assert not marker.exists()
+
+        # Create a store with a recently active session
+        store = _make_store(tmp_path)
+        source = _make_source()
+        entry = store.get_or_create_session(source)
+        assert not entry.suspended
+
+        # Simulate what start() does:
+        if marker.exists():
+            marker.unlink()
+        else:
+            store.suspend_recently_active()
+
+        # Session SHOULD be suspended (crash recovery)
+        with store._lock:
+            store._ensure_loaded_locked()
+            suspended_count = sum(1 for e in store._entries.values() if e.suspended)
+        assert suspended_count == 1, "Session should be suspended after crash (no marker)"
+
+    def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
+        """stop(restart=True) should also write the marker."""
+        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
+        marker = tmp_path / ".clean_shutdown"
+
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._restart_requested = False
+        runner._restart_detached = False
+        runner._restart_via_service = False
+        runner._restart_task_started = False
+        runner._running = True
+        runner._draining = False
+        runner._stop_task = None
+        runner._running_agents = {}
+        runner._pending_messages = {}
+        runner._pending_approvals = {}
+        runner._background_tasks = set()
+        runner._shutdown_event = MagicMock()
+        runner._restart_drain_timeout = 5
+        runner._exit_code = None
+        runner._exit_reason = None
+        runner.adapters = {}
+        runner.config = GatewayConfig()
+
+        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
+             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
+             patch("gateway.run.GatewayRunner._update_runtime_status"), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("tools.process_registry.process_registry") as mock_proc_reg, \
+             patch("tools.terminal_tool.cleanup_all_environments"), \
+             patch("tools.browser_tool.cleanup_all_browsers"):
+            mock_proc_reg.kill_all = MagicMock()
+
+            import asyncio
+            asyncio.get_event_loop().run_until_complete(runner.stop(restart=True))
+
+        assert marker.exists(), ".clean_shutdown marker should exist after restart-stop too"
@@ -0,0 +1,118 @@
+"""Tests for gateway /compress <focus> — focus topic on the gateway side."""
+
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str = "/compress") -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_history() -> list[dict[str, str]]:
+    return [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "two"},
+        {"role": "user", "content": "three"},
+        {"role": "assistant", "content": "four"},
+    ]
+
+
+def _make_runner(history: list[dict[str, str]]):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = history
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner.session_store._save = MagicMock()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_compress_focus_topic_passed_to_agent():
+    """Focus topic from /compress <focus> is passed through to _compress_context."""
+    history = _make_history()
+    compressed = [history[0], history[-1]]
+    runner = _make_runner(history)
+    agent_instance = MagicMock()
+    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor._align_boundary_forward.return_value = 0
+    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
+    agent_instance.session_id = "sess-1"
+    agent_instance._compress_context.return_value = (compressed, "")
+
+    def _estimate(messages):
+        return 100
+
+    with (
+        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
+        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+        patch("run_agent.AIAgent", return_value=agent_instance),
+        patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
+    ):
+        result = await runner._handle_compress_command(_make_event("/compress database schema"))
+
+    # Verify focus_topic was passed
+    agent_instance._compress_context.assert_called_once()
+    call_kwargs = agent_instance._compress_context.call_args
+    assert call_kwargs.kwargs.get("focus_topic") == "database schema"
+
+    # Verify focus is mentioned in response
+    assert 'Focus: "database schema"' in result
+
+
+@pytest.mark.asyncio
+async def test_compress_no_focus_passes_none():
+    """Bare /compress passes focus_topic=None."""
+    history = _make_history()
+    runner = _make_runner(history)
+    agent_instance = MagicMock()
+    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor._align_boundary_forward.return_value = 0
+    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
+    agent_instance.session_id = "sess-1"
+    agent_instance._compress_context.return_value = (list(history), "")
+
+    with (
+        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
+        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+        patch("run_agent.AIAgent", return_value=agent_instance),
+        patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100),
+    ):
+        result = await runner._handle_compress_command(_make_event("/compress"))
+
+    agent_instance._compress_context.assert_called_once()
+    call_kwargs = agent_instance._compress_context.call_args
+    assert call_kwargs.kwargs.get("focus_topic") is None
+
+    # No focus line in response
+    assert "Focus:" not in result
@@ -74,6 +74,26 @@ class FakeBot:
        return None


+class SlowSyncTree(FakeTree):
+    def __init__(self):
+        super().__init__()
+        self.started = asyncio.Event()
+        self.allow_finish = asyncio.Event()
+
+        async def _slow_sync():
+            self.started.set()
+            await self.allow_finish.wait()
+            return []
+
+        self.sync = AsyncMock(side_effect=_slow_sync)
+
+
+class SlowSyncBot(FakeBot):
+    def __init__(self, *, intents, proxy=None):
+        super().__init__(intents=intents, proxy=proxy)
+        self.tree = SlowSyncTree()
+
+
@pytest.mark.asyncio
@pytest.mark.parametrize(
    ("allowed_users", "expected_members_intent"),
@@ -138,3 +158,36 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch):
    assert ok is False
    assert released == [("discord-bot-token", "test-token")]
    assert adapter._platform_lock_identity is None
+
+
+@pytest.mark.asyncio
+async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
+    monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
+
+    intents = SimpleNamespace(message_content=False, dm_messages=False, guild_messages=False, members=False, voice_states=False)
+    monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
+
+    created = {}
+
+    def fake_bot_factory(*, command_prefix, intents, proxy=None):
+        bot = SlowSyncBot(intents=intents, proxy=proxy)
+        created["bot"] = bot
+        return bot
+
+    monkeypatch.setattr(discord_platform.commands, "Bot", fake_bot_factory)
+    monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
+
+    ok = await asyncio.wait_for(adapter.connect(), timeout=1.0)
+
+    assert ok is True
+    assert adapter._ready_event.is_set()
+
+    await asyncio.wait_for(created["bot"].tree.started.wait(), timeout=1.0)
+    assert created["bot"].tree.sync.await_count == 1
+
+    created["bot"].tree.allow_finish.set()
+    await asyncio.sleep(0)
+    await adapter.disconnect()
@@ -359,3 +359,44 @@ async def test_discord_thread_participation_tracked_on_dispatch(adapter, monkeyp
    await adapter._handle_message(message)

    assert "777" in adapter._threads
+
+
+@pytest.mark.asyncio
+async def test_discord_voice_linked_channel_skips_mention_requirement_and_auto_thread(adapter, monkeypatch):
+    """Active voice-linked text channels should behave like free-response channels."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+
+    adapter._voice_text_channels[111] = 789
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(
+        channel=FakeTextChannel(channel_id=789),
+        content="follow-up from voice text chat",
+    )
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "follow-up from voice text chat"
+    assert event.source.chat_type == "group"
+
+
+@pytest.mark.asyncio
+async def test_discord_voice_linked_parent_thread_still_requires_mention(adapter, monkeypatch):
+    """Threads under a voice-linked channel should still require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    adapter._voice_text_channels[111] = 789
+    message = make_message(
+        channel=FakeThread(channel_id=790, parent=FakeTextChannel(channel_id=789)),
+        content="thread reply without mention",
+    )
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
@@ -124,7 +124,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_off_mode_no_reply_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -137,7 +137,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_first_mode_only_first_chunk_references(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -152,7 +152,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_all_mode_all_chunks_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -165,7 +165,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_no_reply_to_param_no_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]

        await adapter.send("12345", "test content", reply_to=None)

@@ -176,7 +176,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_respects_first_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -187,7 +187,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_off_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -200,7 +200,7 @@ class TestSendWithReplyToMode:
    async def test_invalid_mode_falls_back_to_first_behavior(self):
        """Invalid mode behaves like 'first' — only first chunk gets reference."""
        adapter, channel, ref_msg = _make_discord_adapter("banana")
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]

        await adapter.send("12345", "test", reply_to="999")

@@ -189,14 +189,14 @@ class TestPlatformDefaults:
        """Slack, Mattermost, Matrix default to 'new' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("slack", "mattermost", "matrix", "feishu"):
+        for plat in ("slack", "mattermost", "matrix", "feishu", "whatsapp"):
            assert resolve_display_setting({}, plat, "tool_progress") == "new", plat

    def test_low_tier_platforms(self):
-        """Signal, WhatsApp, etc. default to 'off' tool progress."""
+        """Signal, BlueBubbles, etc. default to 'off' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("signal", "whatsapp", "bluebubbles", "weixin", "wecom", "dingtalk"):
+        for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"):
            assert resolve_display_setting({}, plat, "tool_progress") == "off", plat

    def test_minimal_tier_platforms(self):
@@ -0,0 +1,438 @@
+"""Tests for gateway.platforms.feishu — Feishu scan-to-create registration."""
+
+import json
+from unittest.mock import patch, MagicMock
+import pytest
+
+
+def _mock_urlopen(response_data, status=200):
+    """Create a mock for urllib.request.urlopen that returns JSON response_data."""
+    mock_response = MagicMock()
+    mock_response.read.return_value = json.dumps(response_data).encode("utf-8")
+    mock_response.status = status
+    mock_response.__enter__ = lambda s: s
+    mock_response.__exit__ = MagicMock(return_value=False)
+    return mock_response
+
+
+class TestPostRegistration:
+    """Tests for the low-level HTTP helper."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_post_registration_returns_parsed_json(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _post_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({"nonce": "abc", "supported_auth_methods": ["client_secret"]})
+        result = _post_registration("https://accounts.feishu.cn", {"action": "init"})
+        assert result["nonce"] == "abc"
+        assert "client_secret" in result["supported_auth_methods"]
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_post_registration_sends_form_encoded_body(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _post_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({})
+        _post_registration("https://accounts.feishu.cn", {"action": "init", "key": "val"})
+        call_args = mock_urlopen_fn.call_args
+        request = call_args[0][0]
+        body = request.data.decode("utf-8")
+        assert "action=init" in body
+        assert "key=val" in body
+        assert request.get_header("Content-type") == "application/x-www-form-urlencoded"
+
+
+class TestInitRegistration:
+    """Tests for the init step."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_succeeds_when_client_secret_supported(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["client_secret"],
+        })
+        _init_registration("feishu")
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_raises_when_client_secret_not_supported(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["other_method"],
+        })
+        with pytest.raises(RuntimeError, match="client_secret"):
+            _init_registration("feishu")
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_init_uses_lark_url_for_lark_domain(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _init_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "nonce": "abc",
+            "supported_auth_methods": ["client_secret"],
+        })
+        _init_registration("lark")
+        call_args = mock_urlopen_fn.call_args
+        request = call_args[0][0]
+        assert "larksuite.com" in request.full_url
+
+
+class TestBeginRegistration:
+    """Tests for the begin step."""
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_begin_returns_device_code_and_qr_url(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _begin_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "device_code": "dc_123",
+            "verification_uri_complete": "https://accounts.feishu.cn/qr/abc",
+            "user_code": "ABCD-1234",
+            "interval": 5,
+            "expire_in": 600,
+        })
+        result = _begin_registration("feishu")
+        assert result["device_code"] == "dc_123"
+        assert "qr_url" in result
+        assert "accounts.feishu.cn" in result["qr_url"]
+        assert result["user_code"] == "ABCD-1234"
+        assert result["interval"] == 5
+        assert result["expire_in"] == 600
+
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_begin_sends_correct_archetype(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import _begin_registration
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "device_code": "dc_123",
+            "verification_uri_complete": "https://example.com/qr",
+            "user_code": "X",
+            "interval": 5,
+            "expire_in": 600,
+        })
+        _begin_registration("feishu")
+        request = mock_urlopen_fn.call_args[0][0]
+        body = request.data.decode("utf-8")
+        assert "archetype=PersonalAgent" in body
+        assert "auth_method=client_secret" in body
+
+
+class TestPollRegistration:
+    """Tests for the poll step."""
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_credentials_on_success(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "client_id": "cli_app123",
+            "client_secret": "secret456",
+            "user_info": {"open_id": "ou_owner", "tenant_brand": "feishu"},
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["app_id"] == "cli_app123"
+        assert result["app_secret"] == "secret456"
+        assert result["domain"] == "feishu"
+        assert result["open_id"] == "ou_owner"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_switches_domain_on_lark_tenant_brand(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1, 2]
+        mock_time.sleep = MagicMock()
+
+        pending_resp = _mock_urlopen({
+            "error": "authorization_pending",
+            "user_info": {"tenant_brand": "lark"},
+        })
+        success_resp = _mock_urlopen({
+            "client_id": "cli_lark",
+            "client_secret": "secret_lark",
+            "user_info": {"open_id": "ou_lark", "tenant_brand": "lark"},
+        })
+        mock_urlopen_fn.side_effect = [pending_resp, success_resp]
+
+        result = _poll_registration(
+            device_code="dc_123", interval=0, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["domain"] == "lark"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_success_with_lark_brand_in_same_response(self, mock_urlopen_fn, mock_time):
+        """Credentials and lark tenant_brand in one response must not be discarded."""
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "client_id": "cli_lark_direct",
+            "client_secret": "secret_lark_direct",
+            "user_info": {"open_id": "ou_lark_direct", "tenant_brand": "lark"},
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is not None
+        assert result["app_id"] == "cli_lark_direct"
+        assert result["domain"] == "lark"
+        assert result["open_id"] == "ou_lark_direct"
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_none_on_access_denied(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 1]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "error": "access_denied",
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
+        )
+        assert result is None
+
+    @patch("gateway.platforms.feishu.time")
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_poll_returns_none_on_timeout(self, mock_urlopen_fn, mock_time):
+        from gateway.platforms.feishu import _poll_registration
+
+        mock_time.time.side_effect = [0, 999]
+        mock_time.sleep = MagicMock()
+
+        mock_urlopen_fn.return_value = _mock_urlopen({
+            "error": "authorization_pending",
+        })
+        result = _poll_registration(
+            device_code="dc_123", interval=1, expire_in=1, domain="feishu"
+        )
+        assert result is None
+
+
+class TestRenderQr:
+    """Tests for QR code terminal rendering."""
+
+    @patch("gateway.platforms.feishu._qrcode_mod", create=True)
+    def test_render_qr_returns_true_on_success(self, mock_qrcode_mod):
+        from gateway.platforms.feishu import _render_qr
+
+        mock_qr = MagicMock()
+        mock_qrcode_mod.QRCode.return_value = mock_qr
+        assert _render_qr("https://example.com/qr") is True
+        mock_qr.add_data.assert_called_once_with("https://example.com/qr")
+        mock_qr.make.assert_called_once_with(fit=True)
+        mock_qr.print_ascii.assert_called_once()
+
+    def test_render_qr_returns_false_when_qrcode_missing(self):
+        from gateway.platforms.feishu import _render_qr
+
+        with patch("gateway.platforms.feishu._qrcode_mod", None):
+            assert _render_qr("https://example.com/qr") is False
+
+
+class TestProbeBot:
+    """Tests for bot connectivity verification."""
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
+    def test_probe_returns_bot_info_on_success(self):
+        from gateway.platforms.feishu import probe_bot
+
+        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
+            mock_sdk.return_value = {"bot_name": "TestBot", "bot_open_id": "ou_bot123"}
+            result = probe_bot("cli_app", "secret", "feishu")
+
+        assert result is not None
+        assert result["bot_name"] == "TestBot"
+        assert result["bot_open_id"] == "ou_bot123"
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
+    def test_probe_returns_none_on_failure(self):
+        from gateway.platforms.feishu import probe_bot
+
+        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
+            mock_sdk.return_value = None
+            result = probe_bot("bad_id", "bad_secret", "feishu")
+
+        assert result is None
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_http_fallback_when_sdk_unavailable(self, mock_urlopen_fn):
+        """Without lark_oapi, probe falls back to raw HTTP."""
+        from gateway.platforms.feishu import probe_bot
+
+        token_resp = _mock_urlopen({"code": 0, "tenant_access_token": "t-123"})
+        bot_resp = _mock_urlopen({"code": 0, "bot": {"bot_name": "HttpBot", "open_id": "ou_http"}})
+        mock_urlopen_fn.side_effect = [token_resp, bot_resp]
+
+        result = probe_bot("cli_app", "secret", "feishu")
+        assert result is not None
+        assert result["bot_name"] == "HttpBot"
+
+    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
+    @patch("gateway.platforms.feishu.urlopen")
+    def test_http_fallback_returns_none_on_network_error(self, mock_urlopen_fn):
+        from gateway.platforms.feishu import probe_bot
+        from urllib.error import URLError
+
+        mock_urlopen_fn.side_effect = URLError("connection refused")
+        result = probe_bot("cli_app", "secret", "feishu")
+        assert result is None
+
+
+class TestQrRegister:
+    """Tests for the public qr_register entry point."""
+
+    @patch("gateway.platforms.feishu.probe_bot")
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_success_flow(
+        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
+    ):
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = {
+            "app_id": "cli_app",
+            "app_secret": "secret",
+            "domain": "feishu",
+            "open_id": "ou_owner",
+        }
+        mock_probe.return_value = {"bot_name": "MyBot", "bot_open_id": "ou_bot"}
+
+        result = qr_register()
+        assert result is not None
+        assert result["app_id"] == "cli_app"
+        assert result["app_secret"] == "secret"
+        assert result["bot_name"] == "MyBot"
+        mock_init.assert_called_once()
+        mock_render.assert_called_once()
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_init_failure(self, mock_init):
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = RuntimeError("not supported")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_poll_failure(
+        self, mock_init, mock_begin, mock_poll, mock_render
+    ):
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = None
+
+        result = qr_register()
+        assert result is None
+
+    # -- Contract: expected errors → None, unexpected errors → propagate --
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_network_error(self, mock_init):
+        """URLError (network down) is an expected failure → None."""
+        from gateway.platforms.feishu import qr_register
+        from urllib.error import URLError
+
+        mock_init.side_effect = URLError("DNS resolution failed")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_on_json_error(self, mock_init):
+        """Malformed server response is an expected failure → None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = json.JSONDecodeError("bad json", "", 0)
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_propagates_unexpected_errors(self, mock_init):
+        """Bugs (e.g. AttributeError) must not be swallowed — they propagate."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_init.side_effect = AttributeError("some internal bug")
+        with pytest.raises(AttributeError, match="some internal bug"):
+            qr_register()
+
+    # -- Negative paths: partial/malformed server responses --
+
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_returns_none_when_begin_missing_device_code(
+        self, mock_init, mock_begin, mock_render
+    ):
+        """Server returns begin response without device_code → RuntimeError → None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.side_effect = RuntimeError("Feishu registration did not return a device_code")
+        result = qr_register()
+        assert result is None
+
+    @patch("gateway.platforms.feishu.probe_bot")
+    @patch("gateway.platforms.feishu._render_qr")
+    @patch("gateway.platforms.feishu._poll_registration")
+    @patch("gateway.platforms.feishu._begin_registration")
+    @patch("gateway.platforms.feishu._init_registration")
+    def test_qr_register_succeeds_even_when_probe_fails(
+        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
+    ):
+        """Registration succeeds but probe fails → result with bot_name=None."""
+        from gateway.platforms.feishu import qr_register
+
+        mock_begin.return_value = {
+            "device_code": "dc_123",
+            "qr_url": "https://example.com/qr",
+            "user_code": "ABCD",
+            "interval": 1,
+            "expire_in": 60,
+        }
+        mock_poll.return_value = {
+            "app_id": "cli_app",
+            "app_secret": "secret",
+            "domain": "feishu",
+            "open_id": "ou_owner",
+        }
+        mock_probe.return_value = None  # probe failed
+
+        result = qr_register()
+        assert result is not None
+        assert result["app_id"] == "cli_app"
+        assert result["bot_name"] is None
+        assert result["bot_open_id"] is None
@@ -28,12 +28,16 @@ class _FakeRegistry:

    def __init__(self, sessions):
        self._sessions = list(sessions)
+        self._completion_consumed: set = set()

    def get(self, session_id):
        if self._sessions:
            return self._sessions.pop(0)
        return None

+    def is_completion_consumed(self, session_id):
+        return session_id in self._completion_consumed
+

 def _build_runner(monkeypatch, tmp_path) -> GatewayRunner:
    """Create a GatewayRunner with notifications set to 'all'."""
@@ -157,12 +157,44 @@ def _make_fake_mautrix():
    mautrix_crypto_store = types.ModuleType("mautrix.crypto.store")

    class MemoryCryptoStore:
-        def __init__(self, account_id="", pickle_key=""):
+        def __init__(self, account_id="", pickle_key=""):  # noqa: S301
            self.account_id = account_id
            self.pickle_key = pickle_key

    mautrix_crypto_store.MemoryCryptoStore = MemoryCryptoStore

+    # --- mautrix.crypto.store.asyncpg ---
+    mautrix_crypto_store_asyncpg = types.ModuleType("mautrix.crypto.store.asyncpg")
+
+    class PgCryptoStore:
+        upgrade_table = MagicMock()
+
+        def __init__(self, account_id="", pickle_key="", db=None):  # noqa: S301
+            self.account_id = account_id
+            self.pickle_key = pickle_key
+            self.db = db
+
+        async def open(self):
+            pass
+
+    mautrix_crypto_store_asyncpg.PgCryptoStore = PgCryptoStore
+
+    # --- mautrix.util ---
+    mautrix_util = types.ModuleType("mautrix.util")
+
+    # --- mautrix.util.async_db ---
+    mautrix_util_async_db = types.ModuleType("mautrix.util.async_db")
+
+    class Database:
+        @classmethod
+        def create(cls, url, upgrade_table=None):
+            db = MagicMock()
+            db.start = AsyncMock()
+            db.stop = AsyncMock()
+            return db
+
+    mautrix_util_async_db.Database = Database
+
    return {
        "mautrix": mautrix,
        "mautrix.api": mautrix_api,
@@ -171,6 +203,9 @@ def _make_fake_mautrix():
        "mautrix.client.state_store": mautrix_client_state_store,
        "mautrix.crypto": mautrix_crypto,
        "mautrix.crypto.store": mautrix_crypto_store,
+        "mautrix.crypto.store.asyncpg": mautrix_crypto_store_asyncpg,
+        "mautrix.util": mautrix_util,
+        "mautrix.util.async_db": mautrix_util_async_db,
    }


@@ -740,6 +775,12 @@ class TestMatrixAccessTokenAuth:
        mock_client.whoami = AsyncMock(return_value=FakeWhoamiResponse("@bot:example.org", "DEV123"))
        mock_client.sync = AsyncMock(return_value={"rooms": {"join": {"!room:server": {}}}})
        mock_client.add_event_handler = MagicMock()
+        mock_client.handle_sync = MagicMock(return_value=[])
+        mock_client.query_keys = AsyncMock(return_value={
+            "device_keys": {"@bot:example.org": {"DEV123": {
+                "keys": {"ed25519:DEV123": "fake_ed25519_key"},
+            }}},
+        })
        mock_client.api = MagicMock()
        mock_client.api.token = "syt_test_access_token"
        mock_client.api.session = MagicMock()
@@ -751,6 +792,8 @@ class TestMatrixAccessTokenAuth:
        mock_olm.share_keys = AsyncMock()
        mock_olm.share_keys_min_trust = None
        mock_olm.send_keys_min_trust = None
+        mock_olm.account = MagicMock()
+        mock_olm.account.identity_keys = {"ed25519": "fake_ed25519_key"}

        # Patch Client constructor to return our mock
        fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
@@ -924,6 +967,12 @@ class TestMatrixDeviceId:
        mock_client.whoami = AsyncMock(return_value=MagicMock(user_id="@bot:example.org", device_id="WHOAMI_DEV"))
        mock_client.sync = AsyncMock(return_value={"rooms": {"join": {"!room:server": {}}}})
        mock_client.add_event_handler = MagicMock()
+        mock_client.handle_sync = MagicMock(return_value=[])
+        mock_client.query_keys = AsyncMock(return_value={
+            "device_keys": {"@bot:example.org": {"MY_STABLE_DEVICE": {
+                "keys": {"ed25519:MY_STABLE_DEVICE": "fake_ed25519_key"},
+            }}},
+        })
        mock_client.api = MagicMock()
        mock_client.api.token = "syt_test_access_token"
        mock_client.api.session = MagicMock()
@@ -934,6 +983,8 @@ class TestMatrixDeviceId:
        mock_olm.share_keys = AsyncMock()
        mock_olm.share_keys_min_trust = None
        mock_olm.send_keys_min_trust = None
+        mock_olm.account = MagicMock()
+        mock_olm.account.identity_keys = {"ed25519": "fake_ed25519_key"}

        fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
        fake_mautrix_mods["mautrix.crypto"].OlmMachine = MagicMock(return_value=mock_olm)
@@ -1030,8 +1081,8 @@ class TestMatrixDeviceIdConfig:

 class TestMatrixSyncLoop:
    @pytest.mark.asyncio
-    async def test_sync_loop_shares_keys_when_encryption_enabled(self):
-        """_sync_loop should call crypto.share_keys() after each sync."""
+    async def test_sync_loop_dispatches_events_and_stores_token(self):
+        """_sync_loop should call handle_sync() and persist next_batch."""
        adapter = _make_adapter()
        adapter._encryption = True
        adapter._closing = False
@@ -1046,7 +1097,6 @@ class TestMatrixSyncLoop:
            return {"rooms": {"join": {"!room:example.org": {}}}, "next_batch": "s1234"}

        mock_crypto = MagicMock()
-        mock_crypto.share_keys = AsyncMock()

        mock_sync_store = MagicMock()
        mock_sync_store.get_next_batch = AsyncMock(return_value=None)
@@ -1062,7 +1112,6 @@ class TestMatrixSyncLoop:
        await adapter._sync_loop()

        fake_client.sync.assert_awaited_once()
-        mock_crypto.share_keys.assert_awaited_once()
        fake_client.handle_sync.assert_called_once()
        mock_sync_store.put_next_batch.assert_awaited_once_with("s1234")

@@ -1248,6 +1297,12 @@ class TestMatrixEncryptedEventHandler:
        mock_client.whoami = AsyncMock(return_value=MagicMock(user_id="@bot:example.org", device_id="DEV123"))
        mock_client.sync = AsyncMock(return_value={"rooms": {"join": {"!room:server": {}}}})
        mock_client.add_event_handler = MagicMock()
+        mock_client.handle_sync = MagicMock(return_value=[])
+        mock_client.query_keys = AsyncMock(return_value={
+            "device_keys": {"@bot:example.org": {"DEV123": {
+                "keys": {"ed25519:DEV123": "fake_ed25519_key"},
+            }}},
+        })
        mock_client.api = MagicMock()
        mock_client.api.token = "syt_test_token"
        mock_client.api.session = MagicMock()
@@ -1258,6 +1313,8 @@ class TestMatrixEncryptedEventHandler:
        mock_olm.share_keys = AsyncMock()
        mock_olm.share_keys_min_trust = None
        mock_olm.send_keys_min_trust = None
+        mock_olm.account = MagicMock()
+        mock_olm.account.identity_keys = {"ed25519": "fake_ed25519_key"}

        fake_mautrix_mods["mautrix.client"].Client = MagicMock(return_value=mock_client)
        fake_mautrix_mods["mautrix.crypto"].OlmMachine = MagicMock(return_value=mock_olm)
@@ -48,6 +48,7 @@ def _make_event(
    room_id="!room1:example.org",
    formatted_body=None,
    thread_id=None,
+    mention_user_ids=None,
 ):
    """Create a fake room message event.

@@ -60,6 +61,9 @@ def _make_event(
        content["formatted_body"] = formatted_body
        content["format"] = "org.matrix.custom.html"

+    if mention_user_ids is not None:
+        content["m.mentions"] = {"user_ids": mention_user_ids}
+
    relates_to = {}
    if thread_id:
        relates_to["rel_type"] = "m.thread"
@@ -108,6 +112,44 @@ class TestIsBotMentioned:
        # "hermesbot" should not match word-boundary check for "hermes"
        assert not self.adapter._is_bot_mentioned("hermesbot is here")

+    # m.mentions.user_ids — MSC3952 / Matrix v1.7 authoritative mentions
+    # Ported from openclaw/openclaw#64796
+
+    def test_m_mentions_user_ids_authoritative(self):
+        """m.mentions.user_ids alone is sufficient — no body text needed."""
+        assert self.adapter._is_bot_mentioned(
+            "please reply",  # no @hermes anywhere in body
+            mention_user_ids=["@hermes:example.org"],
+        )
+
+    def test_m_mentions_user_ids_with_body_mention(self):
+        """Both m.mentions and body mention — should still be True."""
+        assert self.adapter._is_bot_mentioned(
+            "hey @hermes:example.org help",
+            mention_user_ids=["@hermes:example.org"],
+        )
+
+    def test_m_mentions_user_ids_other_user_only(self):
+        """m.mentions with a different user — bot is NOT mentioned."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello",
+            mention_user_ids=["@alice:example.org"],
+        )
+
+    def test_m_mentions_user_ids_empty_list(self):
+        """Empty user_ids list — falls through to text detection."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello everyone",
+            mention_user_ids=[],
+        )
+
+    def test_m_mentions_user_ids_none(self):
+        """None mention_user_ids — falls through to text detection."""
+        assert not self.adapter._is_bot_mentioned(
+            "hello everyone",
+            mention_user_ids=None,
+        )
+

 class TestStripMention:
    def setup_method(self):
@@ -176,6 +218,44 @@ async def test_require_mention_html_pill(monkeypatch):
    adapter.handle_message.assert_awaited_once()


+@pytest.mark.asyncio
+async def test_require_mention_m_mentions_user_ids(monkeypatch):
+    """m.mentions.user_ids is authoritative per MSC3952 — no body mention needed.
+
+    Ported from openclaw/openclaw#64796.
+    """
+    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
+    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    # Body has NO mention, but m.mentions.user_ids includes the bot.
+    event = _make_event(
+        "please reply",
+        mention_user_ids=["@hermes:example.org"],
+    )
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_require_mention_m_mentions_other_user_ignored(monkeypatch):
+    """m.mentions.user_ids mentioning another user should NOT activate the bot."""
+    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
+    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    event = _make_event(
+        "hey alice check this",
+        mention_user_ids=["@alice:example.org"],
+    )
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_not_awaited()
+
+
@pytest.mark.asyncio
 async def test_require_mention_dm_always_responds(monkeypatch):
    """DMs always respond regardless of mention setting."""
@@ -9,6 +9,8 @@ from gateway.platforms.base import (
    MessageEvent,
    MessageType,
    safe_url_for_log,
+    utf16_len,
+    _prefix_within_utf16_limit,
 )


@@ -448,3 +450,135 @@ class TestGetHumanDelay:
        with patch.dict(os.environ, env):
            delay = BasePlatformAdapter._get_human_delay()
            assert 0.1 <= delay <= 0.2
+
+
+# ---------------------------------------------------------------------------
+# utf16_len / _prefix_within_utf16_limit / truncate_message with len_fn
+# ---------------------------------------------------------------------------
+# Ported from nearai/ironclaw#2304 — Telegram counts message length in UTF-16
+# code units, not Unicode code-points.  Astral-plane characters (emoji, CJK
+# Extension B) are surrogate pairs: 1 Python char but 2 UTF-16 units.
+
+
+class TestUtf16Len:
+    """Verify the UTF-16 length helper."""
+
+    def test_ascii(self):
+        assert utf16_len("hello") == 5
+
+    def test_bmp_cjk(self):
+        # CJK ideographs in the BMP are 1 code unit each
+        assert utf16_len("你好") == 2
+
+    def test_emoji_surrogate_pair(self):
+        # 😀 (U+1F600) is outside BMP → 2 UTF-16 code units
+        assert utf16_len("😀") == 2
+
+    def test_mixed(self):
+        # "hi😀" = 2 + 2 = 4 UTF-16 units
+        assert utf16_len("hi😀") == 4
+
+    def test_musical_symbol(self):
+        # 𝄞 (U+1D11E) — Musical Symbol G Clef, surrogate pair
+        assert utf16_len("𝄞") == 2
+
+    def test_empty(self):
+        assert utf16_len("") == 0
+
+
+class TestPrefixWithinUtf16Limit:
+    """Verify UTF-16-aware prefix truncation."""
+
+    def test_fits_entirely(self):
+        assert _prefix_within_utf16_limit("hello", 10) == "hello"
+
+    def test_ascii_truncation(self):
+        result = _prefix_within_utf16_limit("hello world", 5)
+        assert result == "hello"
+        assert utf16_len(result) <= 5
+
+    def test_does_not_split_surrogate_pair(self):
+        # "a😀b" = 1 + 2 + 1 = 4 UTF-16 units; limit 2 should give "a"
+        result = _prefix_within_utf16_limit("a😀b", 2)
+        assert result == "a"
+        assert utf16_len(result) <= 2
+
+    def test_emoji_at_limit(self):
+        # "😀" = 2 UTF-16 units; limit 2 should include it
+        result = _prefix_within_utf16_limit("😀x", 2)
+        assert result == "😀"
+
+    def test_all_emoji(self):
+        msg = "😀" * 10  # 20 UTF-16 units
+        result = _prefix_within_utf16_limit(msg, 6)
+        assert result == "😀😀😀"
+        assert utf16_len(result) == 6
+
+    def test_empty(self):
+        assert _prefix_within_utf16_limit("", 5) == ""
+
+
+class TestTruncateMessageUtf16:
+    """Verify truncate_message respects UTF-16 lengths when len_fn=utf16_len."""
+
+    def test_short_emoji_message_no_split(self):
+        """A short message under the UTF-16 limit should not be split."""
+        msg = "Hello 😀 world"
+        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
+        assert len(chunks) == 1
+        assert chunks[0] == msg
+
+    def test_emoji_near_limit_triggers_split(self):
+        """A message at 4096 codepoints but >4096 UTF-16 units must split."""
+        # 2049 emoji = 2049 codepoints but 4098 UTF-16 units → exceeds 4096
+        msg = "😀" * 2049
+        assert len(msg) == 2049  # Python len sees 2049 chars
+        assert utf16_len(msg) == 4098  # but it's 4098 UTF-16 units
+
+        # Without UTF-16 awareness, this would NOT split (2049 < 4096)
+        chunks_naive = BasePlatformAdapter.truncate_message(msg, 4096)
+        assert len(chunks_naive) == 1, "Without len_fn, no split expected"
+
+        # With UTF-16 awareness, it MUST split
+        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
+        assert len(chunks) > 1, "With utf16_len, message should be split"
+
+        # Each chunk must fit within the UTF-16 limit
+        for i, chunk in enumerate(chunks):
+            assert utf16_len(chunk) <= 4096, (
+                f"Chunk {i} exceeds 4096 UTF-16 units: {utf16_len(chunk)}"
+            )
+
+    def test_each_utf16_chunk_within_limit(self):
+        """All chunks produced with utf16_len must fit the limit."""
+        # Mix of BMP and astral-plane characters
+        msg = ("Hello 😀 world 🎵 test 𝄞 " * 200).strip()
+        max_len = 200
+        chunks = BasePlatformAdapter.truncate_message(msg, max_len, len_fn=utf16_len)
+        for i, chunk in enumerate(chunks):
+            u16_len = utf16_len(chunk)
+            assert u16_len <= max_len + 20, (
+                f"Chunk {i} UTF-16 length {u16_len} exceeds {max_len}"
+            )
+
+    def test_all_content_preserved(self):
+        """Splitting with utf16_len must not lose content."""
+        words = ["emoji😀", "music🎵", "cjk你好", "plain"] * 100
+        msg = " ".join(words)
+        chunks = BasePlatformAdapter.truncate_message(msg, 200, len_fn=utf16_len)
+        reassembled = " ".join(chunks)
+        for word in words:
+            assert word in reassembled, f"Word '{word}' lost during UTF-16 split"
+
+    def test_code_blocks_preserved_with_utf16(self):
+        """Code block fence handling should work with utf16_len too."""
+        msg = "Before\n```python\n" + "x = '😀'\n" * 200 + "```\nAfter"
+        chunks = BasePlatformAdapter.truncate_message(msg, 300, len_fn=utf16_len)
+        assert len(chunks) > 1
+        # Each chunk should have balanced fences
+        for i, chunk in enumerate(chunks):
+            fence_count = chunk.count("```")
+            assert fence_count % 2 == 0, (
+                f"Chunk {i} has unbalanced fences ({fence_count})"
+            )
+
@@ -0,0 +1,215 @@
+"""Tests for /restart notification — the gateway notifies the requester on comeback."""
+
+import asyncio
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+import gateway.run as gateway_run
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.session import build_session_key
+from tests.gateway.restart_test_helpers import (
+    make_restart_runner,
+    make_restart_source,
+)
+
+
+# ── _handle_restart_command writes .restart_notify.json ──────────────────
+
+
+@pytest.mark.asyncio
+async def test_restart_command_writes_notify_file(tmp_path, monkeypatch):
+    """When /restart fires, the requester's routing info is persisted to disk."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    result = await runner._handle_restart_command(event)
+    assert "Restarting" in result
+
+    notify_path = tmp_path / ".restart_notify.json"
+    assert notify_path.exists()
+    data = json.loads(notify_path.read_text())
+    assert data["platform"] == "telegram"
+    assert data["chat_id"] == "42"
+    assert "thread_id" not in data  # no thread → omitted
+
+
+@pytest.mark.asyncio
+async def test_restart_command_uses_service_restart_under_systemd(tmp_path, monkeypatch):
+    """Under systemd (INVOCATION_ID set), /restart uses via_service=True."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setenv("INVOCATION_ID", "abc123")
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    await runner._handle_restart_command(event)
+    runner.request_restart.assert_called_once_with(detached=False, via_service=True)
+
+
+@pytest.mark.asyncio
+async def test_restart_command_uses_detached_without_systemd(tmp_path, monkeypatch):
+    """Without systemd, /restart uses the detached subprocess approach."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.delenv("INVOCATION_ID", raising=False)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="42")
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m1",
+    )
+
+    await runner._handle_restart_command(event)
+    runner.request_restart.assert_called_once_with(detached=True, via_service=False)
+
+
+@pytest.mark.asyncio
+async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
+    """Thread ID is saved when the requester is in a threaded chat."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, _adapter = make_restart_runner()
+    runner.request_restart = MagicMock(return_value=True)
+
+    source = make_restart_source(chat_id="99")
+    source.thread_id = "topic_7"
+
+    event = MessageEvent(
+        text="/restart",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="m2",
+    )
+
+    await runner._handle_restart_command(event)
+
+    data = json.loads((tmp_path / ".restart_notify.json").read_text())
+    assert data["thread_id"] == "topic_7"
+
+
+# ── _send_restart_notification ───────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkeypatch):
+    """On startup, the notification is sent and the file is removed."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    adapter.send.assert_called_once()
+    call_args = adapter.send.call_args
+    assert call_args[0][0] == "42"  # chat_id
+    assert "restarted" in call_args[0][1].lower()
+    assert call_args[1].get("metadata") is None  # no thread
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
+    """Thread ID is passed as metadata so the message lands in the right topic."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "99",
+        "thread_id": "topic_7",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    call_args = adapter.send.call_args
+    assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_noop_when_no_file(tmp_path, monkeypatch):
+    """Nothing happens if there's no pending restart notification."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock()
+
+    await runner._send_restart_notification()
+
+    adapter.send.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_skips_when_adapter_missing(tmp_path, monkeypatch):
+    """If the requester's platform isn't connected, clean up without crashing."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "discord",  # runner only has telegram adapter
+        "chat_id": "42",
+    }))
+
+    runner, _adapter = make_restart_runner()
+
+    await runner._send_restart_notification()
+
+    # File cleaned up even though we couldn't send
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_cleans_up_on_send_failure(
+    tmp_path, monkeypatch
+):
+    """If the adapter.send() raises, the file is still cleaned up."""
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
+
+    await runner._send_restart_notification()
+
+    assert not notify_path.exists()  # cleaned up despite error
@@ -396,6 +396,27 @@ class QueuedCommentaryAgent:
        }


+class VerboseAgent:
+    """Agent that emits a tool call with args whose JSON exceeds 200 chars."""
+    LONG_CODE = "x" * 300
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback(
+            "tool.started", "execute_code", None,
+            {"code": self.LONG_CODE},
+        )
+        time.sleep(0.35)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 async def _run_with_agent(
    monkeypatch,
    tmp_path,
@@ -575,3 +596,45 @@ async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monke
    assert result["final_response"] == "final response 2"
    assert "I'll inspect the repo first." in sent_texts
    assert "final response 1" in sent_texts
+
+
+@pytest.mark.asyncio
+async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
+    """Verbose mode with default tool_preview_length (0) should NOT truncate args.
+
+    Previously, verbose mode capped args at 200 chars when tool_preview_length
+    was 0 (default).  The user explicitly opted into verbose — show full detail.
+    """
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        VerboseAgent,
+        session_id="sess-verbose-no-truncate",
+        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 0}},
+    )
+
+    assert result["final_response"] == "done"
+    # The full 300-char 'x' string should be present, not truncated to 200
+    all_content = " ".join(call["content"] for call in adapter.sent)
+    all_content += " ".join(call["content"] for call in adapter.edits)
+    assert VerboseAgent.LONG_CODE in all_content
+
+
+@pytest.mark.asyncio
+async def test_verbose_mode_respects_explicit_tool_preview_length(monkeypatch, tmp_path):
+    """When tool_preview_length is set to a positive value, verbose truncates to that."""
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        VerboseAgent,
+        session_id="sess-verbose-explicit-cap",
+        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 50}},
+    )
+
+    assert result["final_response"] == "done"
+    all_content = " ".join(call["content"] for call in adapter.sent)
+    all_content += " ".join(call["content"] for call in adapter.edits)
+    # Should be truncated — full 300-char string NOT present
+    assert VerboseAgent.LONG_CODE not in all_content
+    # But should still contain the truncated portion with "..."
+    assert "..." in all_content
@@ -1,4 +1,5 @@
 import pytest
+from unittest.mock import AsyncMock

 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import BasePlatformAdapter
@@ -45,6 +46,23 @@ class _DisabledAdapter(BasePlatformAdapter):
        return {"id": chat_id}


+class _SuccessfulAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.DISCORD)
+
+    async def connect(self) -> bool:
+        return True
+
+    async def disconnect(self) -> None:
+        self._mark_disconnected()
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        raise NotImplementedError
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
@pytest.mark.asyncio
 async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -65,7 +83,7 @@ async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch,
    state = read_runtime_status()
    assert state["gateway_state"] == "startup_failed"
    assert "temporary DNS resolution failure" in state["exit_reason"]
-    assert state["platforms"]["telegram"]["state"] == "fatal"
+    assert state["platforms"]["telegram"]["state"] == "retrying"
    assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error"


@@ -89,6 +107,64 @@ async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkey
    assert state["gateway_state"] == "running"


+@pytest.mark.asyncio
+async def test_runner_records_connected_platform_state_on_success(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = GatewayConfig(
+        platforms={
+            Platform.DISCORD: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+    runner = GatewayRunner(config)
+
+    monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _SuccessfulAdapter())
+    monkeypatch.setattr(runner.hooks, "discover_and_load", lambda: None)
+    monkeypatch.setattr(runner.hooks, "emit", AsyncMock())
+
+    ok = await runner.start()
+
+    assert ok is True
+    state = read_runtime_status()
+    assert state["gateway_state"] == "running"
+    assert state["platforms"]["discord"]["state"] == "connected"
+    assert state["platforms"]["discord"]["error_code"] is None
+    assert state["platforms"]["discord"]["error_message"] is None
+
+
+@pytest.mark.asyncio
+async def test_start_gateway_verbosity_imports_redacting_formatter(monkeypatch, tmp_path):
+    """Verbosity != None must not crash with NameError on RedactingFormatter (#8044)."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    class _CleanExitRunner:
+        def __init__(self, config):
+            self.config = config
+            self.should_exit_cleanly = True
+            self.exit_reason = None
+            self.adapters = {}
+
+        async def start(self):
+            return True
+
+        async def stop(self):
+            return None
+
+    monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
+    monkeypatch.setattr("tools.skills_sync.sync_skills", lambda quiet=True: None)
+    monkeypatch.setattr("hermes_logging.setup_logging", lambda hermes_home, mode: tmp_path)
+    monkeypatch.setattr("hermes_logging._add_rotating_handler", lambda *args, **kwargs: None)
+    monkeypatch.setattr("gateway.run.GatewayRunner", _CleanExitRunner)
+
+    from gateway.run import start_gateway
+
+    # verbosity=1 triggers the code path that uses RedactingFormatter.
+    # Before the fix this raised NameError.
+    ok = await start_gateway(config=GatewayConfig(), replace=False, verbosity=1)
+
+    assert ok is True
+
+
@pytest.mark.asyncio
 async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_path):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
--- a/Show More
+++ b/Show More