refactor: update github-code-review skill to use MCP tools

Replace gh CLI and curl-based GitHub API interactions with native GitHub MCP tools (mcp_github_*). This modernizes the skill to use the agent's built-in MCP integration for all GitHub operations. Key changes: - Replace gh CLI commands with mcp_github_pull_request_read(), mcp_github_pull_request_review_write(), etc. - Replace curl API calls with mcp_github_add_issue_comment(), mcp_github_add_comment_to_pending_review(), etc. - Add mcp_github_run_secret_scanning() to security checklist - Add mcp_github_request_copilot_review() as optional step - Add quick reference table mapping tasks to MCP tools - Keep git CLI for local diff operations (unchanged) - Bump version to 2.0.0
2026-04-12 18:12:35 +10:00
162 changed files with 1089 additions and 22212 deletions
@@ -1,2 +0,0 @@
-# Auto-generated files — collapse diffs and exclude from language stats
-web/package-lock.json linguist-generated=true
@@ -41,19 +41,11 @@ jobs:
          python-version: '3.11'

      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml httpx
+        run: pip install pyyaml

      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

-      - name: Build skills index (if not already present)
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          if [ ! -f website/static/api/skills-index.json ]; then
-            python3 scripts/build_skills_index.py || echo "Skills index build failed (non-fatal)"
-          fi
-
      - name: Install dependencies
        run: npm ci
        working-directory: website
@@ -69,7 +69,9 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: nousresearch/hermes-agent:latest
+          tags: |
+            nousresearch/hermes-agent:latest
+            nousresearch/hermes-agent:${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max

@@ -81,6 +83,9 @@ jobs:
          file: Dockerfile
          push: true
          platforms: linux/amd64,linux/arm64
-          tags: nousresearch/hermes-agent:${{ github.event.release.tag_name }}
+          tags: |
+            nousresearch/hermes-agent:latest
+            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
+            nousresearch/hermes-agent:${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
@@ -1,101 +0,0 @@
-name: Build Skills Index
-
-on:
-  schedule:
-    # Run twice daily: 6 AM and 6 PM UTC
-    - cron: '0 6,18 * * *'
-  workflow_dispatch:  # Manual trigger
-  push:
-    branches: [main]
-    paths:
-      - 'scripts/build_skills_index.py'
-      - '.github/workflows/skills-index.yml'
-
-permissions:
-  contents: read
-
-jobs:
-  build-index:
-    # Only run on the upstream repository, not on forks
-    if: github.repository == 'NousResearch/hermes-agent'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install dependencies
-        run: pip install httpx pyyaml
-
-      - name: Build skills index
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: python scripts/build_skills_index.py
-
-      - name: Upload index artifact
-        uses: actions/upload-artifact@v4
-        with:
-          name: skills-index
-          path: website/static/api/skills-index.json
-          retention-days: 7
-
-  deploy-with-index:
-    needs: build-index
-    runs-on: ubuntu-latest
-    permissions:
-      pages: write
-      id-token: write
-    environment:
-      name: github-pages
-      url: ${{ steps.deploy.outputs.page_url }}
-    # Only deploy on schedule or manual trigger (not on every push to the script)
-    if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/download-artifact@v4
-        with:
-          name: skills-index
-          path: website/static/api/
-
-      - uses: actions/setup-node@v4
-        with:
-          node-version: 20
-          cache: npm
-          cache-dependency-path: website/package-lock.json
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
-
-      - name: Extract skill metadata for dashboard
-        run: python3 website/scripts/extract-skills.py
-
-      - name: Install dependencies
-        run: npm ci
-        working-directory: website
-
-      - name: Build Docusaurus
-        run: npm run build
-        working-directory: website
-
-      - name: Stage deployment
-        run: |
-          mkdir -p _site/docs
-          cp -r landingpage/* _site/
-          cp -r website/build/* _site/docs/
-          echo "hermes-agent.nousresearch.com" > _site/CNAME
-
-      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
-        with:
-          path: _site
-
-      - name: Deploy to GitHub Pages
-        id: deploy
-        uses: actions/deploy-pages@v4
@@ -51,9 +51,6 @@ ignored/
 .worktrees/
 environments/benchmarks/evals/

-# Web UI build output
-hermes_cli/web_dist/
-
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
@@ -61,4 +58,3 @@ mini-swe-agent/
 # Nix
 .direnv/
 result
-website/static/api/skills-index.json
@@ -167,7 +167,6 @@ python -m pytest tests/ -q
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
 - 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---

@@ -1021,23 +1021,6 @@ _AUTO_PROVIDER_LABELS = {

 _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})

-_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
-
-
-def _normalize_main_runtime(main_runtime: Optional[Dict[str, Any]]) -> Dict[str, str]:
-    """Return a sanitized copy of a live main-runtime override."""
-    if not isinstance(main_runtime, dict):
-        return {}
-    normalized: Dict[str, str] = {}
-    for field in _MAIN_RUNTIME_FIELDS:
-        value = main_runtime.get(field)
-        if isinstance(value, str) and value.strip():
-            normalized[field] = value.strip()
-    provider = normalized.get("provider")
-    if provider:
-        normalized["provider"] = provider.lower()
-    return normalized
-

 def _get_provider_chain() -> List[tuple]:
    """Return the ordered provider detection chain.
@@ -1147,7 +1130,7 @@ def _try_payment_fallback(
    return None, None, ""


-def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Optional[OpenAI], Optional[str]]:
+def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Full auto-detection chain.

    Priority:
@@ -1159,12 +1142,6 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    """
    global auxiliary_is_nous, _stale_base_url_warned
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
-    runtime = _normalize_main_runtime(main_runtime)
-    runtime_provider = runtime.get("provider", "")
-    runtime_model = runtime.get("model", "")
-    runtime_base_url = runtime.get("base_url", "")
-    runtime_api_key = runtime.get("api_key", "")
-    runtime_api_mode = runtime.get("api_mode", "")

    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
    #    provider (not 'custom').  This catches the common "env poisoning"
@@ -1172,7 +1149,7 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    #    old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
    if not _stale_base_url_warned:
        _env_base = os.getenv("OPENAI_BASE_URL", "").strip()
-        _cfg_provider = runtime_provider or _read_main_provider()
+        _cfg_provider = _read_main_provider()
        if (_env_base and _cfg_provider
                and _cfg_provider != "custom"
                and not _cfg_provider.startswith("custom:")):
@@ -1186,25 +1163,12 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
            _stale_base_url_warned = True

    # ── Step 1: non-aggregator main provider → use main model directly ──
-    main_provider = runtime_provider or _read_main_provider()
-    main_model = runtime_model or _read_main_model()
+    main_provider = _read_main_provider()
+    main_model = _read_main_model()
    if (main_provider and main_model
            and main_provider not in _AGGREGATOR_PROVIDERS
            and main_provider not in ("auto", "")):
-        resolved_provider = main_provider
-        explicit_base_url = None
-        explicit_api_key = None
-        if runtime_base_url and (main_provider == "custom" or main_provider.startswith("custom:")):
-            resolved_provider = "custom"
-            explicit_base_url = runtime_base_url
-            explicit_api_key = runtime_api_key or None
-        client, resolved = resolve_provider_client(
-            resolved_provider,
-            main_model,
-            explicit_base_url=explicit_base_url,
-            explicit_api_key=explicit_api_key,
-            api_mode=runtime_api_mode or None,
-        )
+        client, resolved = resolve_provider_client(main_provider, main_model)
        if client is not None:
            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
                        main_provider, resolved or main_model)
@@ -1285,7 +1249,6 @@ def resolve_provider_client(
    explicit_base_url: str = None,
    explicit_api_key: str = None,
    api_mode: str = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1356,7 +1319,7 @@ def resolve_provider_client(

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
-        client, resolved = _resolve_auto(main_runtime=main_runtime)
+        client, resolved = _resolve_auto()
        if client is None:
            return None, None
        # When auto-detection lands on a non-OpenRouter provider (e.g. a
@@ -1580,11 +1543,7 @@ def resolve_provider_client(

 # ── Public API ──────────────────────────────────────────────────────────────

-def get_text_auxiliary_client(
-    task: str = "",
-    *,
-    main_runtime: Optional[Dict[str, Any]] = None,
-) -> Tuple[Optional[OpenAI], Optional[str]]:
+def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, default_model_slug) for text-only auxiliary tasks.

    Args:
@@ -1601,11 +1560,10 @@ def get_text_auxiliary_client(
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
-        main_runtime=main_runtime,
    )


-def get_async_text_auxiliary_client(task: str = "", *, main_runtime: Optional[Dict[str, Any]] = None):
+def get_async_text_auxiliary_client(task: str = ""):
    """Return (async_client, model_slug) for async consumers.

    For standard providers returns (AsyncOpenAI, model). For Codex returns
@@ -1620,7 +1578,6 @@ def get_async_text_auxiliary_client(task: str = "", *, main_runtime: Optional[Di
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
-        main_runtime=main_runtime,
    )


@@ -1935,7 +1892,6 @@ def _get_cached_client(
    base_url: str = None,
    api_key: str = None,
    api_mode: str = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -1959,9 +1915,7 @@ def _get_cached_client(
            loop_id = id(current_loop)
        except RuntimeError:
            pass
-    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -1986,7 +1940,6 @@ def _get_cached_client(
        explicit_base_url=base_url,
        explicit_api_key=api_key,
        api_mode=api_mode,
-        main_runtime=runtime,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -2112,75 +2065,6 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
    return default


-# ---------------------------------------------------------------------------
-# Anthropic-compatible endpoint detection + image block conversion
-# ---------------------------------------------------------------------------
-
-# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
-# Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
-
-
-def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
-    """Detect if an endpoint expects Anthropic-format content blocks.
-
-    Returns True for known Anthropic-compatible providers (MiniMax) and
-    any endpoint whose URL contains ``/anthropic`` in the path.
-    """
-    if provider in _ANTHROPIC_COMPAT_PROVIDERS:
-        return True
-    url_lower = (base_url or "").lower()
-    return "/anthropic" in url_lower
-
-
-def _convert_openai_images_to_anthropic(messages: list) -> list:
-    """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks.
-
-    Only touches messages that have list-type content with ``image_url`` blocks;
-    plain text messages pass through unchanged.
-    """
-    converted = []
-    for msg in messages:
-        content = msg.get("content")
-        if not isinstance(content, list):
-            converted.append(msg)
-            continue
-        new_content = []
-        changed = False
-        for block in content:
-            if block.get("type") == "image_url":
-                image_url_val = (block.get("image_url") or {}).get("url", "")
-                if image_url_val.startswith("data:"):
-                    # Parse data URI: data:<media_type>;base64,<data>
-                    header, _, b64data = image_url_val.partition(",")
-                    media_type = "image/png"
-                    if ":" in header and ";" in header:
-                        media_type = header.split(":", 1)[1].split(";", 1)[0]
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "base64",
-                            "media_type": media_type,
-                            "data": b64data,
-                        },
-                    })
-                else:
-                    # URL-based image
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "url",
-                            "url": image_url_val,
-                        },
-                    })
-                changed = True
-            else:
-                new_content.append(block)
-        converted.append({**msg, "content": new_content} if changed else msg)
-    return converted
-
-
-
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -2265,7 +2149,6 @@ def call_llm(
    model: str = None,
    base_url: str = None,
    api_key: str = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@@ -2331,7 +2214,6 @@ def call_llm(
            base_url=resolved_base_url,
            api_key=resolved_api_key,
            api_mode=resolved_api_mode,
-            main_runtime=main_runtime,
        )
        if client is None:
            # When the user explicitly chose a non-OpenRouter provider but no
@@ -2352,7 +2234,7 @@ def call_llm(
            if not resolved_base_url:
                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
                            task or "call", resolved_provider)
-                client, final_model = _get_cached_client("auto", main_runtime=main_runtime)
+                client, final_model = _get_cached_client("auto")
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -2373,11 +2255,6 @@ def call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

-    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
-    _client_base = str(getattr(client, "base_url", "") or "")
-    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
-        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
-
    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return _validate_llm_response(
@@ -2454,9 +2331,9 @@ def extract_content_or_reasoning(response) -> str:
    if content:
        # Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
        cleaned = re.sub(
-            r"<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>"
+            r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
            r".*?"
-            r"</(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>",
+            r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
            "", content, flags=re.DOTALL | re.IGNORECASE,
        ).strip()
        if cleaned:
@@ -2566,11 +2443,6 @@ async def async_call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

-    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
-    _client_base = str(getattr(client, "base_url", "") or "")
-    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
-        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
-
    try:
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
@@ -86,14 +86,12 @@ class ContextCompressor(ContextEngine):
        base_url: str = "",
        api_key: str = "",
        provider: str = "",
-        api_mode: str = "",
    ) -> None:
        """Update model info after a model switch or fallback activation."""
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
-        self.api_mode = api_mode
        self.context_length = context_length
        self.threshold_tokens = max(
            int(context_length * self.threshold_percent),
@@ -113,13 +111,11 @@ class ContextCompressor(ContextEngine):
        api_key: str = "",
        config_context_length: int | None = None,
        provider: str = "",
-        api_mode: str = "",
    ):
        self.model = model
        self.base_url = base_url
        self.api_key = api_key
        self.provider = provider
-        self.api_mode = api_mode
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
@@ -442,13 +438,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        try:
            call_kwargs = {
                "task": "compression",
-                "main_runtime": {
-                    "model": self.model,
-                    "provider": self.provider,
-                    "base_url": self.base_url,
-                    "api_key": self.api_key,
-                    "api_mode": self.api_mode,
-                },
                "messages": [{"role": "user", "content": prompt}],
                "max_tokens": summary_budget * 2,
                # timeout resolved from auxiliary.compression.timeout config by call_llm
@@ -24,7 +24,6 @@ from hermes_cli.auth import (
    _codex_access_token_is_expiring,
    _decode_jwt_claims,
    _import_codex_cli_tokens,
-    _write_codex_cli_tokens,
    _load_auth_store,
    _load_provider_state,
    _resolve_kimi_base_url,
@@ -694,14 +693,6 @@ class CredentialPool:
                        self._replace_entry(synced, updated)
                        self._persist()
                        self._sync_device_code_entry_to_auth_store(updated)
-                        try:
-                            _write_codex_cli_tokens(
-                                updated.access_token,
-                                updated.refresh_token,
-                                last_refresh=updated.last_refresh,
-                            )
-                        except Exception as wexc:
-                            logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
                        return updated
                    except Exception as retry_exc:
                        logger.debug("Codex retry refresh also failed: %s", retry_exc)
@@ -727,17 +718,6 @@ class CredentialPool:
        # _seed_from_singletons() on the next load_pool() sees fresh state
        # instead of re-seeding stale/consumed tokens.
        self._sync_device_code_entry_to_auth_store(updated)
-        # Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
-        # and VS Code don't hit "refresh_token_reused" on their next refresh.
-        if self.provider == "openai-codex":
-            try:
-                _write_codex_cli_tokens(
-                    updated.access_token,
-                    updated.refresh_token,
-                    last_refresh=updated.last_refresh,
-                )
-            except Exception as wexc:
-                logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
        return updated

    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -775,12 +775,12 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                resp = client.post(f"{server_url}/api/show", json={"name": model})
                if resp.status_code == 200:
                    data = resp.json()
-                    # Prefer explicit num_ctx from Modelfile parameters: this is
-                    # the *runtime* context Ollama will actually allocate KV cache
-                    # for. The GGUF model_info.context_length is the training max,
-                    # which can be larger than num_ctx — using it here would let
-                    # Hermes grow conversations past the runtime limit and Ollama
-                    # would silently truncate. Matches query_ollama_num_ctx().
+                    # Check model_info for context length
+                    model_info = data.get("model_info", {})
+                    for key, value in model_info.items():
+                        if "context_length" in key and isinstance(value, (int, float)):
+                            return int(value)
+                    # Check parameters string for num_ctx
                    params = data.get("parameters", "")
                    if "num_ctx" in params:
                        for line in params.split("\n"):
@@ -791,11 +791,6 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                                        return int(parts[-1])
                                    except ValueError:
                                        pass
-                    # Fall back to GGUF model_info context_length (training max)
-                    model_info = data.get("model_info", {})
-                    for key, value in model_info.items():
-                        if "context_length" in key and isinstance(value, (int, float)):
-                            return int(value)

            # LM Studio native API: /api/v1/models returns max_context_length.
            # This is more reliable than the OpenAI-compat /v1/models which
@@ -144,8 +144,6 @@ class ProviderInfo:
 PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openrouter": "openrouter",
    "anthropic": "anthropic",
-    "openai": "openai",
-    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
    "minimax": "minimax",
@@ -12,7 +12,7 @@ import threading
 from collections import OrderedDict
 from pathlib import Path

-from hermes_constants import get_hermes_home, get_skills_dir, is_wsl
+from hermes_constants import get_hermes_home, get_skills_dir
 from typing import Optional

 from agent.skill_utils import (
@@ -366,36 +366,6 @@ PLATFORM_HINTS = {
    ),
 }

-# ---------------------------------------------------------------------------
-# Environment hints — execution-environment awareness for the agent.
-# Unlike PLATFORM_HINTS (which describe the messaging channel), these describe
-# the machine/OS the agent's tools actually run on.
-# ---------------------------------------------------------------------------
-
-WSL_ENVIRONMENT_HINT = (
-    "You are running inside WSL (Windows Subsystem for Linux). "
-    "The Windows host filesystem is mounted under /mnt/ — "
-    "/mnt/c/ is the C: drive, /mnt/d/ is D:, etc. "
-    "The user's Windows files are typically at "
-    "/mnt/c/Users/<username>/Desktop/, Documents/, Downloads/, etc. "
-    "When the user references Windows paths or desktop files, translate "
-    "to the /mnt/c/ equivalent. You can list /mnt/c/Users/ to discover "
-    "the Windows username if needed."
-)
-
-
-def build_environment_hints() -> str:
-    """Return environment-specific guidance for the system prompt.
-
-    Detects WSL, and can be extended for Termux, Docker, etc.
-    Returns an empty string when no special environment is detected.
-    """
-    hints: list[str] = []
-    if is_wsl():
-        hints.append(WSL_ENVIRONMENT_HINT)
-    return "\n\n".join(hints)
-
-
 CONTEXT_FILE_MAX_CHARS = 20_000
 CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
 CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
@@ -756,16 +726,8 @@ def build_skills_system_prompt(

        result = (
            "## Skills (mandatory)\n"
-            "Before replying, scan the skills below. If a skill matches or is even partially relevant "
-            "to your task, you MUST load it with skill_view(name) and follow its instructions. "
-            "Err on the side of loading — it is always better to have context you don't need "
-            "than to miss critical steps, pitfalls, or established workflows. "
-            "Skills contain specialized knowledge — API endpoints, tool-specific commands, "
-            "and proven workflows that outperform general-purpose approaches. Load the skill "
-            "even if you think you could handle the task with basic tools like web_search or terminal. "
-            "Skills also encode the user's preferred approach, conventions, and quality standards "
-            "for tasks like code review, planning, and testing — load them even for tasks you "
-            "already know how to do, because the skill defines how it should be done here.\n"
+            "Before replying, scan the skills below. If one clearly matches your task, "
+            "load it with skill_view(name) and follow its instructions. "
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -775,7 +737,7 @@ def build_skills_system_prompt(
            + "\n".join(index_lines) + "\n"
            "</available_skills>\n"
            "\n"
-            "Only proceed without loading a skill if genuinely none are relevant to the task."
+            "If none match, proceed normally without loading a skill."
        )

    # ── Store in LRU cache ────────────────────────────────────────────
@@ -36,7 +36,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =

    try:
        response = call_llm(
-            task="title_generation",
+            task="compression",  # reuse compression task config (cheap/fast model)
            messages=messages,
            max_tokens=30,
            temperature=0.3,
@@ -2420,8 +2420,8 @@ class HermesCLI:
        # suppress them during streaming too — unless show_reasoning is
        # enabled, in which case we route the inner content to the
        # reasoning display box instead of discarding it.
-        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>", "<thought>")
-        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>", "</thought>")
+        _OPEN_TAGS = ("<REASONING_SCRATCHPAD>", "<think>", "<reasoning>", "<THINKING>", "<thinking>")
+        _CLOSE_TAGS = ("</REASONING_SCRATCHPAD>", "</think>", "</reasoning>", "</THINKING>", "</thinking>")

        # Append to a pre-filter buffer first
        self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
@@ -2735,22 +2735,6 @@ class HermesCLI:
        if runtime_model and isinstance(runtime_model, str):
            self.model = runtime_model

-        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
-        # without `hermes model`), fall back to the provider's first catalog
-        # model so the API call doesn't fail with "model must be non-empty".
-        if not self.model and resolved_provider:
-            try:
-                from hermes_cli.models import get_default_model_for_provider
-                _default = get_default_model_for_provider(resolved_provider)
-                if _default:
-                    self.model = _default
-                    logger.info(
-                        "No model configured — defaulting to %s for provider %s",
-                        _default, resolved_provider,
-                    )
-            except Exception:
-                pass
-
        # Normalize model for the resolved provider (e.g. swap non-Codex
        # models when provider is openai-codex).  Fixes #651.
        model_changed = self._normalize_model_for_provider(resolved_provider)
@@ -2999,10 +2983,8 @@ class HermesCLI:
                )

        # Warn if the configured model is a Nous Hermes LLM (not agentic)
-        from hermes_cli.model_switch import is_nous_hermes_non_agentic
-
        model_name = getattr(self, "model", "") or ""
-        if is_nous_hermes_non_agentic(model_name):
+        if "hermes" in model_name.lower():
            self.console.print()
            self.console.print(
                "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
@@ -3116,8 +3098,6 @@ class HermesCLI:

        # Collect displayable entries (skip system, tool-result messages)
        entries = []  # list of (role, display_text)
-        _last_asst_idx = None       # index of last assistant entry
-        _last_asst_full = None      # un-truncated display text for last assistant
        for msg in self.conversation_history:
            role = msg.get("role", "")
            content = msg.get("content")
@@ -3147,9 +3127,7 @@ class HermesCLI:
                text = "" if content is None else str(content)
                text = _strip_reasoning(text)
                parts = []
-                full_parts = []  # un-truncated version
                if text:
-                    full_parts.append(text)
                    lines = text.splitlines()
                    if len(lines) > MAX_ASST_LINES:
                        text = "\n".join(lines[:MAX_ASST_LINES]) + " ..."
@@ -3169,15 +3147,11 @@ class HermesCLI:
                    if len(names) > 4:
                        names_str += ", ..."
                    noun = "call" if tc_count == 1 else "calls"
-                    tc_summary = f"[{tc_count} tool {noun}: {names_str}]"
-                    parts.append(tc_summary)
-                    full_parts.append(tc_summary)
+                    parts.append(f"[{tc_count} tool {noun}: {names_str}]")
                if not parts:
                    # Skip pure-reasoning messages that have no visible output
                    continue
                entries.append(("assistant", " ".join(parts)))
-                _last_asst_idx = len(entries) - 1
-                _last_asst_full = " ".join(full_parts)

        if not entries:
            return
@@ -3188,13 +3162,6 @@ class HermesCLI:
            skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2
            entries = entries[skipped:]

-        # Replace last assistant entry with full (un-truncated) text
-        # so the user can see where they left off without wasting tokens.
-        if _last_asst_idx is not None and _last_asst_full:
-            adj_idx = _last_asst_idx - skipped
-            if 0 <= adj_idx < len(entries):
-                entries[adj_idx] = ("assistant_last", _last_asst_full)
-
        # Build the display using Rich
        from rich.panel import Panel
        from rich.text import Text
@@ -3227,13 +3194,6 @@ class HermesCLI:
                lines.append(msg_lines[0] + "\n", style="dim")
                for ml in msg_lines[1:]:
                    lines.append(f"         {ml}\n", style="dim")
-            elif role == "assistant_last":
-                # Last assistant response shown in full, non-dim
-                lines.append("  ◆ Hermes: ", style=f"bold {_assistant_label_c}")
-                msg_lines = text.splitlines()
-                lines.append(msg_lines[0] + "\n", style="")
-                for ml in msg_lines[1:]:
-                    lines.append(f"            {ml}\n", style="")
            else:
                lines.append("  ◆ Hermes: ", style=f"dim bold {_assistant_label_c}")
                msg_lines = text.splitlines()
@@ -5415,16 +5375,10 @@ class HermesCLI:
            self._show_usage()
        elif canonical == "insights":
            self._show_insights(cmd_original)
-        elif canonical == "debug":
-            self._handle_debug_command()
        elif canonical == "paste":
            self._handle_paste_command()
        elif canonical == "image":
            self._handle_image_command(cmd_original)
-        elif canonical == "reload":
-            from hermes_cli.config import reload_env
-            count = reload_env()
-            print(f"  Reloaded .env ({count} var(s) updated)")
        elif canonical == "reload-mcp":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._reload_mcp()
@@ -6335,14 +6289,6 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")

-    def _handle_debug_command(self):
-        """Handle /debug — upload debug report + logs and print paste URLs."""
-        from hermes_cli.debug import run_debug_share
-        from types import SimpleNamespace
-
-        args = SimpleNamespace(lines=200, expire=7, local=False)
-        run_debug_share(args)
-
    def _show_usage(self):
        """Show rate limits (if available) and session token usage."""
        if not self.agent:
@@ -7655,10 +7601,8 @@ class HermesCLI:
                        "error": _summary,
                    }

-            # Start agent in background thread (daemon so it cannot keep the
-            # process alive when the user closes the terminal tab — SIGHUP
-            # exits the main thread and daemon threads are reaped automatically).
-            agent_thread = threading.Thread(target=run_agent, daemon=True)
+            # Start agent in background thread
+            agent_thread = threading.Thread(target=run_agent)
            agent_thread.start()

            # Monitor the dedicated interrupt queue while the agent runs.
@@ -7844,17 +7788,6 @@ class HermesCLI:
                sys.stdout.write("\a")
                sys.stdout.flush()

-            # Notify when iteration budget was hit
-            if result and not result.get("completed") and not result.get("interrupted"):
-                _api_calls = result.get("api_calls", 0)
-                if _api_calls >= getattr(self.agent, "max_iterations", 90):
-                    _max_iter = getattr(self.agent, "max_iterations", 90)
-                    _cprint(
-                        f"\n{_DIM}⚠ Iteration budget reached "
-                        f"({_api_calls}/{_max_iter}) — "
-                        f"response may be incomplete{_RST}"
-                    )
-
            # Speak response aloud if voice TTS is enabled
            # Skip batch TTS when streaming TTS already handled it
            if self._voice_tts and response and not use_streaming_tts:
@@ -9620,37 +9553,17 @@ class HermesCLI:
            pass  # Signal handlers may fail in restricted environments
        
        # Install a custom asyncio exception handler that suppresses the
-        # "Event loop is closed" RuntimeError from httpx transport cleanup
-        # and the "0 is not registered" KeyError from broken stdin (#6393).
-        # The RuntimeError fix is defense-in-depth — the primary fix is
-        # neuter_async_httpx_del which disables __del__ entirely.  The
-        # KeyError fix handles macOS + uv-managed Python environments where
-        # fd 0 is not reliably available to the asyncio selector.
+        # "Event loop is closed" RuntimeError from httpx transport cleanup.
+        # This is defense-in-depth — the primary fix is neuter_async_httpx_del
+        # which disables __del__ entirely, but older clients or SDK upgrades
+        # could bypass it.
        def _suppress_closed_loop_errors(loop, context):
            exc = context.get("exception")
            if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc):
                return  # silently suppress
-            if isinstance(exc, KeyError) and "is not registered" in str(exc):
-                return  # suppress selector registration failures (#6393)
            # Fall back to default handler for everything else
            loop.default_exception_handler(context)

-        # Validate stdin before launching prompt_toolkit — on macOS with
-        # uv-managed Python, fd 0 can be invalid or unregisterable with the
-        # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
-        try:
-            import os as _os
-            _os.fstat(0)
-        except OSError:
-            print(
-                "Error: stdin (fd 0) is not available.\n"
-                "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
-                "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
-            )
-            _run_cleanup()
-            self._print_exit_summary()
-            return
-
        # Run the application with patch_stdout for proper output handling
        try:
            with patch_stdout():
@@ -9664,28 +9577,8 @@ class HermesCLI:
                app.run()
        except (EOFError, KeyboardInterrupt, BrokenPipeError):
            pass
-        except (KeyError, OSError) as _stdin_err:
-            # Catch selector registration failures from broken stdin (#6393).
-            # This is the fallback for cases that slip past the fstat() guard.
-            if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
-                print(
-                    f"\nError: stdin is not usable ({_stdin_err}).\n"
-                    "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
-                    "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup"
-                )
-            else:
-                raise
        finally:
            self._should_exit = True
-            # Interrupt the agent immediately so its daemon thread stops making
-            # API calls and exits promptly (agent_thread is daemon, so the
-            # process will exit once the main thread finishes, but interrupting
-            # avoids wasted API calls and lets run_conversation clean up).
-            if self.agent and getattr(self, '_agent_running', False):
-                try:
-                    self.agent.interrupt()
-                except Exception:
-                    pass
            # Flush memories before exit (only for substantial conversations)
            if self.agent and self.conversation_history:
                try:
@@ -145,49 +145,12 @@ def _build_slack(adapter) -> List[Dict[str, str]]:


 def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
-    """Pull known channels/contacts from state.db gateway metadata.
-
-    Falls back to sessions.json for pre-migration databases.
-    """
-    entries = []
-
-    # Primary: query state.db
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            rows = db.list_gateway_sessions(platform=platform_name)
-        finally:
-            db.close()
-        if rows:
-            seen_ids = set()
-            for row in rows:
-                origin_json = row.get("origin_json")
-                if not origin_json:
-                    continue
-                try:
-                    origin = json.loads(origin_json)
-                except (json.JSONDecodeError, TypeError):
-                    continue
-                entry_id = _session_entry_id(origin)
-                if not entry_id or entry_id in seen_ids:
-                    continue
-                seen_ids.add(entry_id)
-                entries.append({
-                    "id": entry_id,
-                    "name": _session_entry_name(origin),
-                    "type": row.get("chat_type", "dm"),
-                    "thread_id": origin.get("thread_id"),
-                })
-            return entries
-    except Exception as e:
-        logger.debug("Channel directory: state.db lookup failed, falling back: %s", e)
-
-    # Fallback: read sessions.json
+    """Pull known channels/contacts from sessions.json origin data."""
    sessions_path = get_hermes_home() / "sessions" / "sessions.json"
    if not sessions_path.exists():
        return []

+    entries = []
    try:
        with open(sessions_path, encoding="utf-8") as f:
            data = json.load(f)
@@ -665,17 +665,6 @@ def load_gateway_config() -> GatewayConfig:
    _apply_env_overrides(config)
    
    # --- Validate loaded values ---
-    _validate_gateway_config(config)
-
-    return config
-
-
-def _validate_gateway_config(config: "GatewayConfig") -> None:
-    """Validate and sanitize a loaded GatewayConfig in place.
-
-    Called by ``load_gateway_config()`` after all config sources are merged.
-    Extracted as a separate function for testability.
-    """
    policy = config.default_reset_policy

    if not (0 <= policy.at_hour <= 23):
@@ -712,31 +701,7 @@ def _validate_gateway_config(config: "GatewayConfig") -> None:
                platform.value, env_name,
            )

-    # Reject known-weak placeholder tokens.
-    # Ported from openclaw/openclaw#64586: users who copy .env.example
-    # without changing placeholder values get a clear startup error instead
-    # of a confusing "auth failed" from the platform API.
-    try:
-        from hermes_cli.auth import has_usable_secret
-    except ImportError:
-        has_usable_secret = None  # type: ignore[assignment]
-
-    if has_usable_secret is not None:
-        for platform, pconfig in config.platforms.items():
-            if not pconfig.enabled:
-                continue
-            env_name = _token_env_names.get(platform)
-            if not env_name:
-                continue
-            token = pconfig.token
-            if token and token.strip() and not has_usable_secret(token, min_length=4):
-                logger.error(
-                    "%s is enabled but %s is set to a placeholder value ('%s'). "
-                    "Set a real bot token before starting the gateway. "
-                    "The adapter will NOT be started.",
-                    platform.value, env_name, token.strip()[:6] + "...",
-                )
-                pconfig.enabled = False
+    return config


 def _apply_env_overrides(config: GatewayConfig) -> None:
@@ -82,7 +82,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {

    # Tier 3 — no edit support, progress messages are permanent
    "signal":          _TIER_LOW,
-    "whatsapp":        _TIER_MEDIUM,  # Baileys bridge supports /edit
+    "whatsapp":        _TIER_LOW,
    "bluebubbles":     _TIER_LOW,
    "weixin":          _TIER_LOW,
    "wecom":           _TIER_LOW,
@@ -67,23 +67,10 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
    """
    Find the active session_id for a platform + chat_id pair.

-    Queries state.db for matching sessions.  Falls back to sessions.json
-    for pre-migration databases.
+    Scans sessions.json entries and matches where origin.chat_id == chat_id
+    on the right platform.  DM session keys don't embed the chat_id
+    (e.g. "agent:main:telegram:dm"), so we check the origin dict.
    """
-    # Primary: query state.db
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            row = db.find_session_by_origin(platform, chat_id, thread_id=thread_id)
-        finally:
-            db.close()
-        if row:
-            return row.get("id")
-    except Exception as e:
-        logger.debug("Mirror: state.db lookup failed, falling back to sessions.json: %s", e)
-
-    # Fallback: read sessions.json
    if not _SESSIONS_INDEX.exists():
        return None

@@ -54,66 +54,6 @@ DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
 MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies
 CHAT_COMPLETIONS_SSE_KEEPALIVE_SECONDS = 30.0
-MAX_NORMALIZED_TEXT_LENGTH = 65_536  # 64 KB cap for normalized content parts
-MAX_CONTENT_LIST_SIZE = 1_000  # Max items when content is an array
-
-
-def _normalize_chat_content(
-    content: Any, *, _max_depth: int = 10, _depth: int = 0,
-) -> str:
-    """Normalize OpenAI chat message content into a plain text string.
-
-    Some clients (Open WebUI, LobeChat, etc.) send content as an array of
-    typed parts instead of a plain string::
-
-        [{"type": "text", "text": "hello"}, {"type": "input_text", "text": "..."}]
-
-    This function flattens those into a single string so the agent pipeline
-    (which expects strings) doesn't choke.
-
-    Defensive limits prevent abuse: recursion depth, list size, and output
-    length are all bounded.
-    """
-    if _depth > _max_depth:
-        return ""
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
-
-    if isinstance(content, list):
-        parts: List[str] = []
-        items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
-        for item in items:
-            if isinstance(item, str):
-                if item:
-                    parts.append(item[:MAX_NORMALIZED_TEXT_LENGTH])
-            elif isinstance(item, dict):
-                item_type = str(item.get("type") or "").strip().lower()
-                if item_type in {"text", "input_text", "output_text"}:
-                    text = item.get("text", "")
-                    if text:
-                        try:
-                            parts.append(str(text)[:MAX_NORMALIZED_TEXT_LENGTH])
-                        except Exception:
-                            pass
-                # Silently skip image_url / other non-text parts
-            elif isinstance(item, list):
-                nested = _normalize_chat_content(item, _max_depth=_max_depth, _depth=_depth + 1)
-                if nested:
-                    parts.append(nested)
-            # Check accumulated size
-            if sum(len(p) for p in parts) >= MAX_NORMALIZED_TEXT_LENGTH:
-                break
-        result = "\n".join(parts)
-        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
-
-    # Fallback for unexpected types (int, float, bool, etc.)
-    try:
-        result = str(content)
-        return result[:MAX_NORMALIZED_TEXT_LENGTH] if len(result) > MAX_NORMALIZED_TEXT_LENGTH else result
-    except Exception:
-        return ""


 def check_api_server_requirements() -> bool:
@@ -613,7 +553,7 @@ class APIServerAdapter(BasePlatformAdapter):

        for msg in messages:
            role = msg.get("role", "")
-            content = _normalize_chat_content(msg.get("content", ""))
+            content = msg.get("content", "")
            if role == "system":
                # Accumulate system messages
                if system_prompt is None:
@@ -986,7 +926,18 @@ class APIServerAdapter(BasePlatformAdapter):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = _normalize_chat_content(item.get("content", ""))
+                    content = item.get("content", "")
+                    # Handle content that may be a list of content parts
+                    if isinstance(content, list):
+                        text_parts = []
+                        for part in content:
+                            if isinstance(part, dict) and part.get("type") == "input_text":
+                                text_parts.append(part.get("text", ""))
+                            elif isinstance(part, dict) and part.get("type") == "output_text":
+                                text_parts.append(part.get("text", ""))
+                            elif isinstance(part, str):
+                                text_parts.append(part)
+                        content = "\n".join(text_parts)
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1819,23 +1770,6 @@ class APIServerAdapter(BasePlatformAdapter):
                )
                return False

-            # Refuse to start network-accessible with a placeholder key.
-            # Ported from openclaw/openclaw#64586.
-            if is_network_accessible(self._host) and self._api_key:
-                try:
-                    from hermes_cli.auth import has_usable_secret
-                    if not has_usable_secret(self._api_key, min_length=8):
-                        logger.error(
-                            "[%s] Refusing to start: API_SERVER_KEY is set to a "
-                            "placeholder value. Generate a real secret "
-                            "(e.g. `openssl rand -hex 32`) and set API_SERVER_KEY "
-                            "before exposing the API server on %s.",
-                            self.name, self._host,
-                        )
-                        return False
-                except ImportError:
-                    pass
-
            # Port conflict detection — fail fast if port is already in use
            try:
                with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
@@ -21,59 +21,6 @@ from urllib.parse import urlsplit
 logger = logging.getLogger(__name__)


-def utf16_len(s: str) -> int:
-    """Count UTF-16 code units in *s*.
-
-    Telegram's message-length limit (4 096) is measured in UTF-16 code units,
-    **not** Unicode code-points.  Characters outside the Basic Multilingual
-    Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
-    surrogate pairs and therefore consume **two** UTF-16 code units each, even
-    though Python's ``len()`` counts them as one.
-
-    Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
-    Rust's ``chars().count()``.
-    """
-    return len(s.encode("utf-16-le")) // 2
-
-
-def _prefix_within_utf16_limit(s: str, limit: int) -> str:
-    """Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.
-
-    Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
-    we never slice a multi-code-unit character in half.
-    """
-    if utf16_len(s) <= limit:
-        return s
-    # Binary search for the longest safe prefix
-    lo, hi = 0, len(s)
-    while lo < hi:
-        mid = (lo + hi + 1) // 2
-        if utf16_len(s[:mid]) <= limit:
-            lo = mid
-        else:
-            hi = mid - 1
-    return s[:lo]
-
-
-def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
-    """Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.
-
-    Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
-    length in units different from Python codepoints (e.g. UTF-16 code units).
-    Falls back to binary search which is O(log n) calls to *len_fn*.
-    """
-    if len_fn(s) <= budget:
-        return len(s)
-    lo, hi = 0, len(s)
-    while lo < hi:
-        mid = (lo + hi + 1) // 2
-        if len_fn(s[:mid]) <= budget:
-            lo = mid
-        else:
-            hi = mid - 1
-    return lo
-
-
 def is_network_accessible(host: str) -> bool:
    """Return True if *host* would expose the server beyond loopback.

@@ -1939,11 +1886,7 @@ class BasePlatformAdapter(ABC):
        return content
    
    @staticmethod
-    def truncate_message(
-        content: str,
-        max_length: int = 4096,
-        len_fn: Optional["Callable[[str], int]"] = None,
-    ) -> List[str]:
+    def truncate_message(content: str, max_length: int = 4096) -> List[str]:
        """
        Split a long message into chunks, preserving code block boundaries.

@@ -1955,16 +1898,11 @@ class BasePlatformAdapter(ABC):
        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
-            len_fn: Optional length function for measuring string length.
-                     Defaults to ``len`` (Unicode code-points).  Pass
-                     ``utf16_len`` for platforms that measure message
-                     length in UTF-16 code units (e.g. Telegram).

        Returns:
            List of message chunks
        """
-        _len = len_fn or len
-        if _len(content) <= max_length:
+        if len(content) <= max_length:
            return [content]

        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
@@ -1983,33 +1921,22 @@ class BasePlatformAdapter(ABC):

            # How much body text we can fit after accounting for the prefix,
            # a potential closing fence, and the chunk indicator.
-            headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
+            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
            if headroom < 1:
                headroom = max_length // 2

            # Everything remaining fits in one final chunk
-            if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
+            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
                chunks.append(prefix + remaining)
                break

-            # Find a natural split point (prefer newlines, then spaces).
-            # When _len != len (e.g. utf16_len for Telegram), headroom is
-            # measured in the custom unit.  We need codepoint-based slice
-            # positions that stay within the custom-unit budget.
-            #
-            # _safe_slice_pos() maps a custom-unit budget to the largest
-            # codepoint offset whose custom length ≤ budget.
-            if _len is not len:
-                # Map headroom (custom units) → codepoint slice length
-                _cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
-            else:
-                _cp_limit = headroom
-            region = remaining[:_cp_limit]
+            # Find a natural split point (prefer newlines, then spaces)
+            region = remaining[:headroom]
            split_at = region.rfind("\n")
-            if split_at < _cp_limit // 2:
+            if split_at < headroom // 2:
                split_at = region.rfind(" ")
            if split_at < 1:
-                split_at = _cp_limit
+                split_at = headroom

            # Avoid splitting inside an inline code span (`...`).
            # If the text before split_at has an odd number of unescaped
@@ -2029,7 +1956,7 @@ class BasePlatformAdapter(ABC):
                    safe_split = candidate.rfind(" ", 0, last_bt)
                    nl_split = candidate.rfind("\n", 0, last_bt)
                    safe_split = max(safe_split, nl_split)
-                    if safe_split > _cp_limit // 4:
+                    if safe_split > headroom // 4:
                        split_at = safe_split

            chunk_body = remaining[:split_at]
@@ -34,9 +34,6 @@ from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional
-from urllib.error import HTTPError, URLError
-from urllib.parse import urlencode
-from urllib.request import Request, urlopen

 # aiohttp/websockets are independent optional deps — import outside lark_oapi
 # so they remain available for tests and webhook mode even if lark_oapi is missing.
@@ -172,19 +169,6 @@ _FEISHU_CARD_ACTION_DEDUP_TTL_SECONDS = 15 * 60    # card action token dedup win
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
 _FEISHU_ACK_EMOJI = "OK"
-
-# QR onboarding constants
-_ONBOARD_ACCOUNTS_URLS = {
-    "feishu": "https://accounts.feishu.cn",
-    "lark": "https://accounts.larksuite.com",
-}
-_ONBOARD_OPEN_URLS = {
-    "feishu": "https://open.feishu.cn",
-    "lark": "https://open.larksuite.com",
-}
-_REGISTRATION_PATH = "/oauth/v1/app/registration"
-_ONBOARD_REQUEST_TIMEOUT_S = 10
-
 # ---------------------------------------------------------------------------
 # Fallback display strings
 # ---------------------------------------------------------------------------
@@ -3637,328 +3621,3 @@ class FeishuAdapter(BasePlatformAdapter):
            return _FEISHU_FILE_UPLOAD_TYPE, "file"

        return _FEISHU_FILE_UPLOAD_TYPE, "file"
-
-
-# =============================================================================
-# QR scan-to-create onboarding
-#
-# Device-code flow: user scans a QR code with Feishu/Lark mobile app and the
-# platform creates a fully configured bot application automatically.
-# Called by `hermes gateway setup` via _setup_feishu() in hermes_cli/gateway.py.
-# =============================================================================
-
-
-def _accounts_base_url(domain: str) -> str:
-    return _ONBOARD_ACCOUNTS_URLS.get(domain, _ONBOARD_ACCOUNTS_URLS["feishu"])
-
-
-def _onboard_open_base_url(domain: str) -> str:
-    return _ONBOARD_OPEN_URLS.get(domain, _ONBOARD_OPEN_URLS["feishu"])
-
-
-def _post_registration(base_url: str, body: Dict[str, str]) -> dict:
-    """POST form-encoded data to the registration endpoint, return parsed JSON.
-
-    The registration endpoint returns JSON even on 4xx (e.g. poll returns
-    authorization_pending as a 400). We always parse the body regardless of
-    HTTP status.
-    """
-    url = f"{base_url}{_REGISTRATION_PATH}"
-    data = urlencode(body).encode("utf-8")
-    req = Request(url, data=data, headers={"Content-Type": "application/x-www-form-urlencoded"})
-    try:
-        with urlopen(req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
-            return json.loads(resp.read().decode("utf-8"))
-    except HTTPError as exc:
-        body_bytes = exc.read()
-        if body_bytes:
-            try:
-                return json.loads(body_bytes.decode("utf-8"))
-            except (ValueError, json.JSONDecodeError):
-                raise exc from None
-        raise
-
-
-def _init_registration(domain: str = "feishu") -> None:
-    """Verify the environment supports client_secret auth.
-
-    Raises RuntimeError if not supported.
-    """
-    base_url = _accounts_base_url(domain)
-    res = _post_registration(base_url, {"action": "init"})
-    methods = res.get("supported_auth_methods") or []
-    if "client_secret" not in methods:
-        raise RuntimeError(
-            f"Feishu / Lark registration environment does not support client_secret auth. "
-            f"Supported: {methods}"
-        )
-
-
-def _begin_registration(domain: str = "feishu") -> dict:
-    """Start the device-code flow. Returns device_code, qr_url, user_code, interval, expire_in."""
-    base_url = _accounts_base_url(domain)
-    res = _post_registration(base_url, {
-        "action": "begin",
-        "archetype": "PersonalAgent",
-        "auth_method": "client_secret",
-        "request_user_info": "open_id",
-    })
-    device_code = res.get("device_code")
-    if not device_code:
-        raise RuntimeError("Feishu / Lark registration did not return a device_code")
-    qr_url = res.get("verification_uri_complete", "")
-    if "?" in qr_url:
-        qr_url += "&from=hermes&tp=hermes"
-    else:
-        qr_url += "?from=hermes&tp=hermes"
-    return {
-        "device_code": device_code,
-        "qr_url": qr_url,
-        "user_code": res.get("user_code", ""),
-        "interval": res.get("interval") or 5,
-        "expire_in": res.get("expire_in") or 600,
-    }
-
-
-def _poll_registration(
-    *,
-    device_code: str,
-    interval: int,
-    expire_in: int,
-    domain: str = "feishu",
-) -> Optional[dict]:
-    """Poll until the user scans the QR code, or timeout/denial.
-
-    Returns dict with app_id, app_secret, domain, open_id on success.
-    Returns None on failure.
-    """
-    deadline = time.time() + expire_in
-    current_domain = domain
-    domain_switched = False
-    poll_count = 0
-
-    while time.time() < deadline:
-        base_url = _accounts_base_url(current_domain)
-        try:
-            res = _post_registration(base_url, {
-                "action": "poll",
-                "device_code": device_code,
-                "tp": "ob_app",
-            })
-        except (URLError, OSError, json.JSONDecodeError):
-            time.sleep(interval)
-            continue
-
-        poll_count += 1
-        if poll_count == 1:
-            print("  Fetching configuration results...", end="", flush=True)
-        elif poll_count % 6 == 0:
-            print(".", end="", flush=True)
-
-        # Domain auto-detection
-        user_info = res.get("user_info") or {}
-        tenant_brand = user_info.get("tenant_brand")
-        if tenant_brand == "lark" and not domain_switched:
-            current_domain = "lark"
-            domain_switched = True
-            # Fall through — server may return credentials in this same response.
-
-        # Success
-        if res.get("client_id") and res.get("client_secret"):
-            if poll_count > 0:
-                print()  # newline after "Fetching configuration results..." dots
-            return {
-                "app_id": res["client_id"],
-                "app_secret": res["client_secret"],
-                "domain": current_domain,
-                "open_id": user_info.get("open_id"),
-            }
-
-        # Terminal errors
-        error = res.get("error", "")
-        if error in ("access_denied", "expired_token"):
-            if poll_count > 0:
-                print()
-            logger.warning("[Feishu onboard] Registration %s", error)
-            return None
-
-        # authorization_pending or unknown — keep polling
-        time.sleep(interval)
-
-    if poll_count > 0:
-        print()
-    logger.warning("[Feishu onboard] Poll timed out after %ds", expire_in)
-    return None
-
-
-try:
-    import qrcode as _qrcode_mod
-except (ImportError, TypeError):
-    _qrcode_mod = None  # type: ignore[assignment]
-
-
-def _render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    if _qrcode_mod is None:
-        return False
-    try:
-        qr = _qrcode_mod.QRCode()
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
-    """Verify bot connectivity via /open-apis/bot/v3/info.
-
-    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
-    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
-    """
-    if FEISHU_AVAILABLE:
-        return _probe_bot_sdk(app_id, app_secret, domain)
-    return _probe_bot_http(app_id, app_secret, domain)
-
-
-def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
-    """Build a lark Client for the given credentials and domain."""
-    sdk_domain = LARK_DOMAIN if domain == "lark" else FEISHU_DOMAIN
-    return (
-        lark.Client.builder()
-        .app_id(app_id)
-        .app_secret(app_secret)
-        .domain(sdk_domain)
-        .log_level(lark.LogLevel.WARNING)
-        .build()
-    )
-
-
-def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
-    if data.get("code") != 0:
-        return None
-    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
-    return {
-        "bot_name": bot.get("bot_name"),
-        "bot_open_id": bot.get("open_id"),
-    }
-
-
-def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
-    """Probe bot info using lark_oapi SDK."""
-    try:
-        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
-        )
-        return _parse_bot_response(json.loads(resp.content))
-    except Exception as exc:
-        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
-        return None
-
-
-def _probe_bot_http(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
-    """Fallback probe using raw HTTP (when lark_oapi is not installed)."""
-    base_url = _onboard_open_base_url(domain)
-    try:
-        token_data = json.dumps({"app_id": app_id, "app_secret": app_secret}).encode("utf-8")
-        token_req = Request(
-            f"{base_url}/open-apis/auth/v3/tenant_access_token/internal",
-            data=token_data,
-            headers={"Content-Type": "application/json"},
-        )
-        with urlopen(token_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
-            token_res = json.loads(resp.read().decode("utf-8"))
-
-        access_token = token_res.get("tenant_access_token")
-        if not access_token:
-            return None
-
-        bot_req = Request(
-            f"{base_url}/open-apis/bot/v3/info",
-            headers={
-                "Authorization": f"Bearer {access_token}",
-                "Content-Type": "application/json",
-            },
-        )
-        with urlopen(bot_req, timeout=_ONBOARD_REQUEST_TIMEOUT_S) as resp:
-            bot_res = json.loads(resp.read().decode("utf-8"))
-
-        return _parse_bot_response(bot_res)
-    except (URLError, OSError, KeyError, json.JSONDecodeError) as exc:
-        logger.debug("[Feishu onboard] HTTP probe failed: %s", exc)
-        return None
-
-
-def qr_register(
-    *,
-    initial_domain: str = "feishu",
-    timeout_seconds: int = 600,
-) -> Optional[dict]:
-    """Run the Feishu / Lark scan-to-create QR registration flow.
-
-    Returns on success::
-
-        {
-            "app_id": str,
-            "app_secret": str,
-            "domain": "feishu" | "lark",
-            "open_id": str | None,
-            "bot_name": str | None,
-            "bot_open_id": str | None,
-        }
-
-    Returns None on expected failures (network, auth denied, timeout).
-    Unexpected errors (bugs, protocol regressions) propagate to the caller.
-    """
-    try:
-        return _qr_register_inner(initial_domain=initial_domain, timeout_seconds=timeout_seconds)
-    except (RuntimeError, URLError, OSError, json.JSONDecodeError) as exc:
-        logger.warning("[Feishu onboard] Registration failed: %s", exc)
-        return None
-
-
-def _qr_register_inner(
-    *,
-    initial_domain: str,
-    timeout_seconds: int,
-) -> Optional[dict]:
-    """Run init → begin → poll → probe. Raises on network/protocol errors."""
-    print("  Connecting to Feishu / Lark...", end="", flush=True)
-    _init_registration(initial_domain)
-    begin = _begin_registration(initial_domain)
-    print(" done.")
-
-    print()
-    qr_url = begin["qr_url"]
-    if _render_qr(qr_url):
-        print(f"\n  Scan the QR code above, or open this URL directly:\n  {qr_url}")
-    else:
-        print(f"  Open this URL in Feishu / Lark on your phone:\n\n  {qr_url}\n")
-        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
-    print()
-
-    result = _poll_registration(
-        device_code=begin["device_code"],
-        interval=begin["interval"],
-        expire_in=min(begin["expire_in"], timeout_seconds),
-        domain=initial_domain,
-    )
-    if not result:
-        return None
-
-    # Probe bot — best-effort, don't fail the registration
-    bot_info = probe_bot(result["app_id"], result["app_secret"], result["domain"])
-    if bot_info:
-        result["bot_name"] = bot_info.get("bot_name")
-        result["bot_open_id"] = bot_info.get("bot_open_id")
-    else:
-        result["bot_name"] = None
-        result["bot_open_id"] = None
-
-    return result
@@ -18,7 +18,6 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
-    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """

@@ -509,19 +508,6 @@ class MatrixAdapter(BasePlatformAdapter):
                    await api.session.close()
                    return False

-                # Import cross-signing private keys from SSSS and self-sign
-                # the current device. Required after any device-key rotation
-                # (fresh crypto.db, share_keys re-upload) — otherwise the
-                # device's self-signing signature is stale and peers refuse
-                # to share Megolm sessions with the rotated device.
-                recovery_key = os.getenv("MATRIX_RECOVERY_KEY", "").strip()
-                if recovery_key:
-                    try:
-                        await olm.verify_with_recovery_key(recovery_key)
-                        logger.info("Matrix: cross-signing verified via recovery key")
-                    except Exception as exc:
-                        logger.warning("Matrix: recovery key verification failed: %s", exc)
-
                client.crypto = olm
                logger.info(
                    "Matrix: E2EE enabled (store: %s%s)",
@@ -782,7 +768,7 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-                async with _aiohttp.ClientSession(trust_env=True) as http:
+                async with _aiohttp.ClientSession() as http:
                    async with http.get(image_url, timeout=_aiohttp.ClientTimeout(total=30)) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -1135,10 +1121,7 @@ class MatrixAdapter(BasePlatformAdapter):
            thread_id = relates_to.get("event_id")

        formatted_body = source_content.get("formatted_body")
-        # m.mentions.user_ids (MSC3952 / Matrix v1.7) — authoritative mention signal.
-        mentions_block = source_content.get("m.mentions") or {}
-        mention_user_ids = mentions_block.get("user_ids") if isinstance(mentions_block, dict) else None
-        is_mentioned = self._is_bot_mentioned(body, formatted_body, mention_user_ids)
+        is_mentioned = self._is_bot_mentioned(body, formatted_body)

        # Require-mention gating.
        if not is_dm:
@@ -1825,24 +1808,8 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

-    def _is_bot_mentioned(
-        self,
-        body: str,
-        formatted_body: Optional[str] = None,
-        mention_user_ids: Optional[list] = None,
-    ) -> bool:
-        """Return True if the bot is mentioned in the message.
-
-        Per MSC3952, ``m.mentions.user_ids`` is the authoritative mention
-        signal in the Matrix spec.  When the sender's client populates that
-        field with the bot's user-id, we trust it — even when the visible
-        body text does not contain an explicit ``@bot`` string (some clients
-        only render mention "pills" in ``formatted_body`` or use display
-        names).
-        """
-        # m.mentions.user_ids — authoritative per MSC3952 / Matrix v1.7.
-        if mention_user_ids and self._user_id and self._user_id in mention_user_ids:
-            return True
+    def _is_bot_mentioned(self, body: str, formatted_body: Optional[str] = None) -> bool:
+        """Return True if the bot is mentioned in the message."""
        if not body and not formatted_body:
            return False
        if self._user_id and self._user_id in body:
@@ -66,8 +66,6 @@ from gateway.platforms.base import (
    cache_audio_from_bytes,
    cache_document_from_bytes,
    SUPPORTED_DOCUMENT_TYPES,
-    utf16_len,
-    _prefix_within_utf16_limit,
 )
 from gateway.platforms.telegram_network import (
    TelegramFallbackTransport,
@@ -801,9 +799,7 @@ class TelegramAdapter(BasePlatformAdapter):
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
-            chunks = self.truncate_message(
-                formatted, self.MAX_MESSAGE_LENGTH, len_fn=utf16_len,
-            )
+            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
            if len(chunks) > 1:
                # truncate_message appends a raw " (1/2)" suffix. Escape the
                # MarkdownV2-special parentheses so Telegram doesn't reject the
@@ -974,9 +970,7 @@ class TelegramAdapter(BasePlatformAdapter):
            # streaming).  Truncate and succeed so the stream consumer can
            # split the overflow into a new message instead of dying.
            if "message_too_long" in err_str or "too long" in err_str:
-                truncated = _prefix_within_utf16_limit(
-                    content, self.MAX_MESSAGE_LENGTH - 20
-                ) + "…"
+                truncated = content[: self.MAX_MESSAGE_LENGTH - 20] + "…"
                try:
                    await self._bot.edit_message_text(
                        chat_id=int(chat_id),
@@ -266,7 +266,7 @@ class WeComAdapter(BasePlatformAdapter):
    async def _open_connection(self) -> None:
        """Open and authenticate a websocket connection."""
        await self._cleanup_ws()
-        self._session = aiohttp.ClientSession(trust_env=True)
+        self._session = aiohttp.ClientSession()
        self._ws = await self._session.ws_connect(
            self._ws_url,
            heartbeat=HEARTBEAT_INTERVAL_SECONDS * 2,
@@ -112,7 +112,6 @@ TYPING_STOP = 2
 _HEADER_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$")
 _TABLE_RULE_RE = re.compile(r"^\s*\|?(?:\s*:?-{3,}:?\s*\|)+\s*:?-{3,}:?\s*\|?\s*$")
 _FENCE_RE = re.compile(r"^```([^\n`]*)\s*$")
-_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")


 def check_weixin_requirements() -> bool:
@@ -399,16 +398,15 @@ async def _send_message(
    context_token: Optional[str],
    client_id: str,
 ) -> None:
-    if not text or not text.strip():
-        raise ValueError("_send_message: text must not be empty")
    message: Dict[str, Any] = {
        "from_user_id": "",
        "to_user_id": to,
        "client_id": client_id,
        "message_type": MSG_TYPE_BOT,
        "message_state": MSG_STATE_FINISH,
-        "item_list": [{"type": ITEM_TEXT, "text_item": {"text": text}}],
    }
+    if text:
+        message["item_list"] = [{"type": ITEM_TEXT, "text_item": {"text": text}}]
    if context_token:
        message["context_token"] = context_token
    await _api_post(
@@ -501,15 +499,13 @@ async def _upload_ciphertext(
    session: "aiohttp.ClientSession",
    *,
    ciphertext: bytes,
-    upload_url: str,
+    cdn_base_url: str,
+    upload_param: str,
+    filekey: str,
 ) -> str:
-    """Upload encrypted media to the CDN.
-
-    Accepts either a constructed CDN URL (from upload_param) or a direct
-    upload_full_url — both use POST with the raw ciphertext as the body.
-    """
+    url = _cdn_upload_url(cdn_base_url, upload_param, filekey)
    timeout = aiohttp.ClientTimeout(total=120)
-    async with session.post(upload_url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
+    async with session.post(url, data=ciphertext, headers={"Content-Type": "application/octet-stream"}, timeout=timeout) as response:
        if response.status == 200:
            encrypted_param = response.headers.get("x-encrypted-param")
            if encrypted_param:
@@ -653,7 +649,7 @@ def _normalize_markdown_blocks(content: str) -> str:
            result.append(_rewrite_table_block_for_weixin(table_lines))
            continue

-        result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
+        result.append(_rewrite_headers_for_weixin(line))
        i += 1

    normalized = "\n".join(item.rstrip() for item in result)
@@ -815,8 +811,6 @@ def _split_text_for_weixin_delivery(
    ``platforms.weixin.extra.split_multiline_messages`` (``true`` / ``false``)
    or the env var ``WEIXIN_SPLIT_MULTILINE_MESSAGES``.
    """
-    if not content:
-        return []
    if split_per_line:
        # Legacy: one message per top-level delivery unit.
        if len(content) <= max_length and "\n" not in content:
@@ -827,14 +821,14 @@ def _split_text_for_weixin_delivery(
                chunks.append(unit)
                continue
            chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length))
-        return [c for c in chunks if c] or [content]
+        return chunks or [content]

    # Compact (default): single message when under the limit — unless the
    # content looks like a short chatty exchange, in which case split into
    # separate bubbles for a more natural chat feel.
    if len(content) <= max_length:
        return (
-            [u for u in _split_delivery_units_for_weixin(content) if u]
+            _split_delivery_units_for_weixin(content)
            if _should_split_short_chat_block_for_weixin(content)
            else [content]
        )
@@ -935,7 +929,7 @@ async def qr_login(
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError("aiohttp is required for Weixin QR login")

-    async with aiohttp.ClientSession(trust_env=True) as session:
+    async with aiohttp.ClientSession() as session:
        try:
            qr_resp = await _api_get(
                session,
@@ -1048,10 +1042,6 @@ class WeixinAdapter(BasePlatformAdapter):

    MAX_MESSAGE_LENGTH = 4000

-    # WeChat does not support editing sent messages — streaming must use the
-    # fallback "send-final-only" path so the cursor (▉) is never left visible.
-    SUPPORTS_MESSAGE_EDITING = False
-
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.WEIXIN)
        extra = config.extra or {}
@@ -1134,7 +1124,7 @@ class WeixinAdapter(BasePlatformAdapter):
        except Exception as exc:
            logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)

-        self._session = aiohttp.ClientSession(trust_env=True)
+        self._session = aiohttp.ClientSession()
        self._token_store.restore(self._account_id)
        self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
        self._mark_connected()
@@ -1461,7 +1451,7 @@ class WeixinAdapter(BasePlatformAdapter):
        context_token = self._token_store.get(self._account_id, chat_id)
        last_message_id: Optional[str] = None
        try:
-            chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
+            chunks = self._split_text(self.format_message(content))
            for idx, chunk in enumerate(chunks):
                client_id = f"hermes-weixin-{uuid.uuid4().hex}"
                await self._send_text_chunk(
@@ -1565,33 +1555,6 @@ class WeixinAdapter(BasePlatformAdapter):
            logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
            return SendResult(success=False, error=str(exc))

-    async def send_video(
-        self,
-        chat_id: str,
-        video_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        if not self._session or not self._token:
-            return SendResult(success=False, error="Not connected")
-        try:
-            message_id = await self._send_file(chat_id, video_path, caption or "")
-            return SendResult(success=True, message_id=message_id)
-        except Exception as exc:
-            logger.error("[%s] send_video failed to=%s: %s", self.name, _safe_id(chat_id), exc)
-            return SendResult(success=False, error=str(exc))
-
-    async def send_voice(
-        self,
-        chat_id: str,
-        audio_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
-
    async def _download_remote_media(self, url: str) -> str:
        from tools.url_safety import is_safe_url

@@ -1614,7 +1577,6 @@ class WeixinAdapter(BasePlatformAdapter):
        filekey = secrets.token_hex(16)
        aes_key = secrets.token_bytes(16)
        rawsize = len(plaintext)
-        rawfilemd5 = hashlib.md5(plaintext).hexdigest()
        upload_response = await _get_upload_url(
            self._session,
            base_url=self._base_url,
@@ -1623,42 +1585,41 @@ class WeixinAdapter(BasePlatformAdapter):
            media_type=media_type,
            filekey=filekey,
            rawsize=rawsize,
-            rawfilemd5=rawfilemd5,
+            rawfilemd5=hashlib.md5(plaintext).hexdigest(),
            filesize=_aes_padded_size(rawsize),
            aeskey_hex=aes_key.hex(),
        )
        upload_param = str(upload_response.get("upload_param") or "")
        upload_full_url = str(upload_response.get("upload_full_url") or "")
        ciphertext = _aes128_ecb_encrypt(plaintext, aes_key)
-
-        # Prefer upload_full_url (direct CDN), fall back to constructed CDN URL
-        # from upload_param.  Both paths use POST — the old PUT for
-        # upload_full_url caused 404s on the WeChat CDN.
-        if upload_full_url:
-            upload_url = upload_full_url
-        elif upload_param:
-            upload_url = _cdn_upload_url(self._cdn_base_url, upload_param, filekey)
+        if upload_param:
+            encrypted_query_param = await _upload_ciphertext(
+                self._session,
+                ciphertext=ciphertext,
+                cdn_base_url=self._cdn_base_url,
+                upload_param=upload_param,
+                filekey=filekey,
+            )
+        elif upload_full_url:
+            timeout = aiohttp.ClientTimeout(total=120)
+            async with self._session.put(
+                upload_full_url,
+                data=ciphertext,
+                headers={"Content-Type": "application/octet-stream"},
+                timeout=timeout,
+            ) as response:
+                response.raise_for_status()
+                encrypted_query_param = response.headers.get("x-encrypted-param") or filekey
        else:
            raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")

-        encrypted_query_param = await _upload_ciphertext(
-            self._session,
-            ciphertext=ciphertext,
-            upload_url=upload_url,
-        )
-
        context_token = self._token_store.get(self._account_id, chat_id)
-        # The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
-        # Sending base64(raw_bytes) causes images to show as grey boxes on the
-        # receiver side because the decryption key doesn't match.
-        aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
        media_item = item_builder(
            encrypt_query_param=encrypted_query_param,
-            aes_key_for_api=aes_key_for_api,
+            aes_key_b64=base64.b64encode(aes_key).decode("ascii"),
            ciphertext_size=len(ciphertext),
            plaintext_size=rawsize,
            filename=Path(path).name,
-            rawfilemd5=rawfilemd5,
        )

        last_message_id = None
@@ -1698,53 +1659,39 @@ class WeixinAdapter(BasePlatformAdapter):
    def _outbound_media_builder(self, path: str):
        mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
        if mime.startswith("image/"):
-            return MEDIA_IMAGE, lambda **kw: {
+            return MEDIA_IMAGE, lambda **kwargs: {
                "type": ITEM_IMAGE,
                "image_item": {
                    "media": {
-                        "encrypt_query_param": kw["encrypt_query_param"],
-                        "aes_key": kw["aes_key_for_api"],
+                        "encrypt_query_param": kwargs["encrypt_query_param"],
+                        "aes_key": kwargs["aes_key_b64"],
                        "encrypt_type": 1,
                    },
-                    "mid_size": kw["ciphertext_size"],
+                    "mid_size": kwargs["ciphertext_size"],
                },
            }
        if mime.startswith("video/"):
-            return MEDIA_VIDEO, lambda **kw: {
+            return MEDIA_VIDEO, lambda **kwargs: {
                "type": ITEM_VIDEO,
                "video_item": {
                    "media": {
-                        "encrypt_query_param": kw["encrypt_query_param"],
-                        "aes_key": kw["aes_key_for_api"],
+                        "encrypt_query_param": kwargs["encrypt_query_param"],
+                        "aes_key": kwargs["aes_key_b64"],
                        "encrypt_type": 1,
                    },
-                    "video_size": kw["ciphertext_size"],
-                    "play_length": kw.get("play_length", 0),
-                    "video_md5": kw.get("rawfilemd5", ""),
+                    "video_size": kwargs["ciphertext_size"],
                },
            }
-        if mime.startswith("audio/") or path.endswith(".silk"):
-            return MEDIA_VOICE, lambda **kw: {
-                "type": ITEM_VOICE,
-                "voice_item": {
-                    "media": {
-                        "encrypt_query_param": kw["encrypt_query_param"],
-                        "aes_key": kw["aes_key_for_api"],
-                        "encrypt_type": 1,
-                    },
-                    "playtime": kw.get("playtime", 0),
-                },
-            }
-        return MEDIA_FILE, lambda **kw: {
+        return MEDIA_FILE, lambda **kwargs: {
            "type": ITEM_FILE,
            "file_item": {
                "media": {
-                    "encrypt_query_param": kw["encrypt_query_param"],
-                    "aes_key": kw["aes_key_for_api"],
+                    "encrypt_query_param": kwargs["encrypt_query_param"],
+                    "aes_key": kwargs["aes_key_b64"],
                    "encrypt_type": 1,
                },
-                "file_name": kw["filename"],
-                "len": str(kw["plaintext_size"]),
+                "file_name": kwargs["filename"],
+                "len": str(kwargs["plaintext_size"]),
            },
        }

@@ -1784,7 +1731,7 @@ async def send_weixin_direct(
    token_store.restore(account_id)
    context_token = token_store.get(account_id, chat_id)

-    async with aiohttp.ClientSession(trust_env=True) as session:
+    async with aiohttp.ClientSession() as session:
        adapter = WeixinAdapter(
            PlatformConfig(
                enabled=True,
@@ -120,9 +120,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - session_path: Path to store WhatsApp session data
    """
    
-    # WhatsApp message limits — practical UX limit, not protocol max.
-    # WhatsApp allows ~65K but long messages are unreadable on mobile.
-    MAX_MESSAGE_LENGTH = 4096
+    # WhatsApp message limits
+    MAX_MESSAGE_LENGTH = 65536  # WhatsApp allows longer messages
    
    # Default bridge location relative to the hermes-agent install
    _DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
@@ -532,63 +531,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
        self._close_bridge_log()
        print(f"[{self.name}] Disconnected")
    
-    def format_message(self, content: str) -> str:
-        """Convert standard markdown to WhatsApp-compatible formatting.
-
-        WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
-        and monospaced `inline`. Standard markdown uses different syntax
-        for bold/italic/strikethrough, so we convert here.
-
-        Code blocks (``` fenced) and inline code (`) are protected from
-        conversion via placeholder substitution.
-        """
-        if not content:
-            return content
-
-        # --- 1. Protect fenced code blocks from formatting changes ---
-        _FENCE_PH = "\x00FENCE"
-        fences: list[str] = []
-
-        def _save_fence(m: re.Match) -> str:
-            fences.append(m.group(0))
-            return f"{_FENCE_PH}{len(fences) - 1}\x00"
-
-        result = re.sub(r"```[\s\S]*?```", _save_fence, content)
-
-        # --- 2. Protect inline code ---
-        _CODE_PH = "\x00CODE"
-        codes: list[str] = []
-
-        def _save_code(m: re.Match) -> str:
-            codes.append(m.group(0))
-            return f"{_CODE_PH}{len(codes) - 1}\x00"
-
-        result = re.sub(r"`[^`\n]+`", _save_code, result)
-
-        # --- 3. Convert markdown formatting to WhatsApp syntax ---
-        # Bold: **text** or __text__ → *text*
-        result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
-        result = re.sub(r"__(.+?)__", r"*\1*", result)
-        # Strikethrough: ~~text~~ → ~text~
-        result = re.sub(r"~~(.+?)~~", r"~\1~", result)
-        # Italic: *text* is already WhatsApp italic — leave as-is
-        # _text_ is already WhatsApp italic — leave as-is
-
-        # --- 4. Convert markdown headers to bold text ---
-        # # Header → *Header*
-        result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
-
-        # --- 5. Convert markdown links: [text](url) → text (url) ---
-        result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
-
-        # --- 6. Restore protected sections ---
-        for i, fence in enumerate(fences):
-            result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
-        for i, code in enumerate(codes):
-            result = result.replace(f"{_CODE_PH}{i}\x00", code)
-
-        return result
-
    async def send(
        self,
        chat_id: str,
@@ -596,57 +538,38 @@ class WhatsAppAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None
    ) -> SendResult:
-        """Send a message via the WhatsApp bridge.
-
-        Formats markdown for WhatsApp, splits long messages into chunks
-        that preserve code block boundaries, and sends each chunk sequentially.
-        """
+        """Send a message via the WhatsApp bridge."""
        if not self._running or not self._http_session:
            return SendResult(success=False, error="Not connected")
        bridge_exit = await self._check_managed_bridge_exit()
        if bridge_exit:
            return SendResult(success=False, error=bridge_exit)
-
-        if not content or not content.strip():
-            return SendResult(success=True, message_id=None)
-
+        
        try:
            import aiohttp

-            # Format and chunk the message
-            formatted = self.format_message(content)
-            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
-
-            last_message_id = None
-            for chunk in chunks:
-                payload: Dict[str, Any] = {
-                    "chatId": chat_id,
-                    "message": chunk,
-                }
-                if reply_to and last_message_id is None:
-                    # Only reply-to on the first chunk
-                    payload["replyTo"] = reply_to
-
-                async with self._http_session.post(
-                    f"http://127.0.0.1:{self._bridge_port}/send",
-                    json=payload,
-                    timeout=aiohttp.ClientTimeout(total=30)
-                ) as resp:
-                    if resp.status == 200:
-                        data = await resp.json()
-                        last_message_id = data.get("messageId")
-                    else:
-                        error = await resp.text()
-                        return SendResult(success=False, error=error)
-
-                # Small delay between chunks to avoid rate limiting
-                if len(chunks) > 1:
-                    await asyncio.sleep(0.3)
-
-            return SendResult(
-                success=True,
-                message_id=last_message_id,
-            )
+            payload = {
+                "chatId": chat_id,
+                "message": content,
+            }
+            if reply_to:
+                payload["replyTo"] = reply_to
+            
+            async with self._http_session.post(
+                f"http://127.0.0.1:{self._bridge_port}/send",
+                json=payload,
+                timeout=aiohttp.ClientTimeout(total=30)
+            ) as resp:
+                if resp.status == 200:
+                    data = await resp.json()
+                    return SendResult(
+                        success=True,
+                        message_id=data.get("messageId"),
+                        raw_response=data
+                    )
+                else:
+                    error = await resp.text()
+                    return SendResult(success=False, error=error)
        except Exception as e:
            return SendResult(success=False, error=str(e))

@@ -186,8 +186,6 @@ if _config_path.exists():
                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
-            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
-                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
        _display_cfg = _cfg.get("display", {})
@@ -878,47 +876,13 @@ class GatewayRunner:
                "api_mode": override.get("api_mode"),
            }
            if override_runtime.get("api_key"):
-                logger.debug(
-                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
-                    (resolved_session_key or "")[:30], model, override_model,
-                    override_runtime.get("provider"),
-                )
                return override_model, override_runtime
-            # Override exists but has no api_key — fall through to env-based
-            # resolution and apply model/provider from the override on top.
-            logger.debug(
-                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
-                (resolved_session_key or "")[:30], model, override_model,
-            )
-        else:
-            logger.debug(
-                "No session model override: session=%s config_model=%s override_keys=%s",
-                (resolved_session_key or "")[:30], model,
-                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
-            )

        runtime_kwargs = _resolve_runtime_agent_kwargs()
        if override and resolved_session_key:
            model, runtime_kwargs = self._apply_session_model_override(
                resolved_session_key, model, runtime_kwargs
            )
-
-        # When the config has no model.default but a provider was resolved
-        # (e.g. user ran `hermes auth add openai-codex` without `hermes model`),
-        # fall back to the provider's first catalog model so the API call
-        # doesn't fail with "model must be a non-empty string".
-        if not model and runtime_kwargs.get("provider"):
-            try:
-                from hermes_cli.models import get_default_model_for_provider
-                model = get_default_model_for_provider(runtime_kwargs["provider"])
-                if model:
-                    logger.info(
-                        "No model configured — defaulting to %s for provider %s",
-                        model, runtime_kwargs["provider"],
-                    )
-            except Exception:
-                pass
-
        return model, runtime_kwargs

    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@@ -1537,25 +1501,12 @@ class GatewayRunner:
        # This prevents stuck sessions from being blindly resumed on restart,
        # which can create an unrecoverable loop (#7536).  Suspended sessions
        # auto-reset on the next incoming message, giving the user a clean start.
-        #
-        # SKIP suspension after a clean (graceful) shutdown — the previous
-        # process already drained active agents, so sessions aren't stuck.
-        # This prevents unwanted auto-resets after `hermes update`,
-        # `hermes gateway restart`, or `/restart`.
-        _clean_marker = _hermes_home / ".clean_shutdown"
-        if _clean_marker.exists():
-            logger.info("Previous gateway exited cleanly — skipping session suspension")
-            try:
-                _clean_marker.unlink()
-            except Exception:
-                pass
-        else:
-            try:
-                suspended = self.session_store.suspend_recently_active()
-                if suspended:
-                    logger.info("Suspended %d in-flight session(s) from previous run", suspended)
-            except Exception as e:
-                logger.warning("Session suspension on startup failed: %s", e)
+        try:
+            suspended = self.session_store.suspend_recently_active()
+            if suspended:
+                logger.info("Suspended %d in-flight session(s) from previous run", suspended)
+        except Exception as e:
+            logger.warning("Session suspension on startup failed: %s", e)

        connected_count = 0
        enabled_platform_count = 0
@@ -1717,9 +1668,6 @@ class GatewayRunner:
        ):
            self._schedule_update_notification_watch()

-        # Notify the chat that initiated /restart that the gateway is back.
-        await self._send_restart_notification()
-
        # Drain any recovered process watchers (from crash recovery checkpoint)
        try:
            from tools.process_registry import process_registry
@@ -1819,11 +1767,6 @@ class GatewayRunner:
                        with self.session_store._lock:
                            entry.memory_flushed = True
                            self.session_store._save()
-                        if self._session_db:
-                            try:
-                                self._session_db.set_memory_flushed(entry.session_id)
-                            except Exception:
-                                pass
                        logger.debug(
                            "Memory flush completed for session %s",
                            entry.session_id,
@@ -1841,11 +1784,6 @@ class GatewayRunner:
                            with self.session_store._lock:
                                entry.memory_flushed = True
                                self.session_store._save()
-                            if self._session_db:
-                                try:
-                                    self._session_db.set_memory_flushed(entry.session_id)
-                                except Exception:
-                                    pass
                            _flush_failures.pop(entry.session_id, None)
                        else:
                            logger.debug(
@@ -2094,15 +2032,6 @@ class GatewayRunner:
            from gateway.status import remove_pid_file
            remove_pid_file()

-            # Write a clean-shutdown marker so the next startup knows this
-            # wasn't a crash.  suspend_recently_active() only needs to run
-            # after unexpected exits — graceful shutdowns already drain
-            # active agents, so there's no stuck-session risk.
-            try:
-                (_hermes_home / ".clean_shutdown").touch()
-            except Exception:
-                pass
-
            if self._restart_requested and self._restart_via_service:
                self._exit_code = GATEWAY_SERVICE_RESTART_EXIT_CODE
                self._exit_reason = self._exit_reason or "Gateway restart requested"
@@ -2770,9 +2699,6 @@ class GatewayRunner:
        if canonical == "update":
            return await self._handle_update_command(event)

-        if canonical == "debug":
-            return await self._handle_debug_command(event)
-
        if canonical == "title":
            return await self._handle_title_command(event)

@@ -4160,36 +4086,11 @@ class GatewayRunner:
                return f"⏳ Draining {count} active agent(s) before restart..."
            return "⏳ Gateway restart already in progress..."

-        # Save the requester's routing info so the new gateway process can
-        # notify them once it comes back online.
-        try:
-            import json as _json
-            notify_data = {
-                "platform": event.source.platform.value if event.source.platform else None,
-                "chat_id": event.source.chat_id,
-            }
-            if event.source.thread_id:
-                notify_data["thread_id"] = event.source.thread_id
-            (_hermes_home / ".restart_notify.json").write_text(
-                _json.dumps(notify_data)
-            )
-        except Exception as e:
-            logger.debug("Failed to write restart notify file: %s", e)
-
        active_agents = self._running_agent_count()
-        # When running under a service manager (systemd/launchd), use the
-        # service restart path: exit with code 75 so the service manager
-        # restarts us.  The detached subprocess approach (setsid + bash)
-        # doesn't work under systemd because KillMode=mixed kills all
-        # processes in the cgroup, including the detached helper.
-        _under_service = bool(os.environ.get("INVOCATION_ID"))  # systemd sets this
-        if _under_service:
-            self.request_restart(detached=False, via_service=True)
-        else:
-            self.request_restart(detached=True, via_service=False)
+        self.request_restart(detached=True, via_service=False)
        if active_agents:
            return f"⏳ Draining {active_agents} active agent(s) before restart..."
-        return "♻ Restarting gateway. If you aren't notified within 60 seconds, restart from the console with `hermes gateway restart`."
+        return "♻ Restarting gateway..."

    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
@@ -4403,11 +4304,6 @@ class GatewayRunner:
                            "api_mode": result.api_mode,
                        }

-                        # Evict cached agent so the next turn creates a fresh
-                        # agent from the override rather than relying on the
-                        # stale cache signature to trigger a rebuild.
-                        _self._evict_cached_agent(_session_key)
-
                        # Build confirmation text
                        plabel = result.provider_label or result.target_provider
                        lines = [f"Model switched to `{result.new_model}`"]
@@ -4521,10 +4417,6 @@ class GatewayRunner:
            "api_mode": result.api_mode,
        }

-        # Evict cached agent so the next turn creates a fresh agent from the
-        # override rather than relying on cache signature mismatch detection.
-        self._evict_cached_agent(session_key)
-
        # Persist to config if --global
        if persist_global:
            try:
@@ -6469,61 +6361,6 @@ class GatewayRunner:
        Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.LOCAL,
    })

-    async def _handle_debug_command(self, event: MessageEvent) -> str:
-        """Handle /debug — upload debug report + logs and return paste URLs."""
-        import asyncio
-        from hermes_cli.debug import (
-            _capture_dump, collect_debug_report, _read_full_log,
-            upload_to_pastebin,
-        )
-
-        loop = asyncio.get_running_loop()
-
-        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
-        def _collect_and_upload():
-            dump_text = _capture_dump()
-            report = collect_debug_report(log_lines=200, dump_text=dump_text)
-            agent_log = _read_full_log("agent")
-            gateway_log = _read_full_log("gateway")
-
-            if agent_log:
-                agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
-            if gateway_log:
-                gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
-
-            urls = {}
-            failures = []
-
-            try:
-                urls["Report"] = upload_to_pastebin(report)
-            except Exception as exc:
-                return f"✗ Failed to upload debug report: {exc}"
-
-            if agent_log:
-                try:
-                    urls["agent.log"] = upload_to_pastebin(agent_log)
-                except Exception:
-                    failures.append("agent.log")
-
-            if gateway_log:
-                try:
-                    urls["gateway.log"] = upload_to_pastebin(gateway_log)
-                except Exception:
-                    failures.append("gateway.log")
-
-            lines = ["**Debug report uploaded:**", ""]
-            label_width = max(len(k) for k in urls)
-            for label, url in urls.items():
-                lines.append(f"`{label:<{label_width}}`  {url}")
-
-            if failures:
-                lines.append(f"\n_(failed to upload: {', '.join(failures)})_")
-
-            lines.append("\nShare these links with the Hermes team for support.")
-            return "\n".join(lines)
-
-        return await loop.run_in_executor(None, _collect_and_upload)
-
    async def _handle_update_command(self, event: MessageEvent) -> str:
        """Handle /update command — update Hermes Agent to the latest version.

@@ -6766,12 +6603,8 @@ class GatewayRunner:
            if buffer.strip() and (loop.time() - last_stream_time) >= stream_interval:
                await _flush_buffer()

-            # Check for prompts — only forward if we haven't already sent
-            # one that's still awaiting a response.  Without this guard the
-            # watcher would re-read the same .update_prompt.json every poll
-            # cycle and spam the user with duplicate prompt messages.
-            if (prompt_path.exists() and session_key
-                    and not self._update_prompt_pending.get(session_key)):
+            # Check for prompts
+            if prompt_path.exists() and session_key:
                try:
                    prompt_data = json.loads(prompt_path.read_text())
                    prompt_text = prompt_data.get("prompt", "")
@@ -6803,11 +6636,6 @@ class GatewayRunner:
                                f"or type your answer directly."
                            )
                        self._update_prompt_pending[session_key] = True
-                        # Remove the prompt file so it isn't re-read on the
-                        # next poll cycle.  The update process only needs
-                        # .update_response to continue — it doesn't re-check
-                        # .update_prompt.json while waiting.
-                        prompt_path.unlink(missing_ok=True)
                        logger.info("Forwarded update prompt to %s: %s", session_key, prompt_text[:80])
                except (json.JSONDecodeError, OSError) as e:
                    logger.debug("Failed to read update prompt: %s", e)
@@ -6918,48 +6746,6 @@ class GatewayRunner:

        return True

-    async def _send_restart_notification(self) -> None:
-        """Notify the chat that initiated /restart that the gateway is back."""
-        import json as _json
-
-        notify_path = _hermes_home / ".restart_notify.json"
-        if not notify_path.exists():
-            return
-
-        try:
-            data = _json.loads(notify_path.read_text())
-            platform_str = data.get("platform")
-            chat_id = data.get("chat_id")
-            thread_id = data.get("thread_id")
-
-            if not platform_str or not chat_id:
-                return
-
-            platform = Platform(platform_str)
-            adapter = self.adapters.get(platform)
-            if not adapter:
-                logger.debug(
-                    "Restart notification skipped: %s adapter not connected",
-                    platform_str,
-                )
-                return
-
-            metadata = {"thread_id": thread_id} if thread_id else None
-            await adapter.send(
-                chat_id,
-                "♻ Gateway restarted successfully. Your session continues.",
-                metadata=metadata,
-            )
-            logger.info(
-                "Sent restart notification to %s:%s",
-                platform_str,
-                chat_id,
-            )
-        except Exception as e:
-            logger.warning("Restart notification failed: %s", e)
-        finally:
-            notify_path.unlink(missing_ok=True)
-
    def _set_session_env(self, context: SessionContext) -> list:
        """Set session context variables for the current async task.

@@ -7491,11 +7277,9 @@ class GatewayRunner:
                    _pl = get_tool_preview_max_len()
                    import json as _json
                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
-                    # When tool_preview_length is 0 (default), don't truncate
-                    # in verbose mode — the user explicitly asked for full
-                    # detail.  Platform message-length limits handle the rest.
-                    if _pl > 0 and len(args_str) > _pl:
-                        args_str = args_str[:_pl - 3] + "..."
+                    _cap = _pl if _pl > 0 else 200
+                    if len(args_str) > _cap:
+                        args_str = args_str[:_cap - 3] + "..."
                    msg = f"{emoji} {tool_name}({list(args.keys())})\n{args_str}"
                elif preview:
                    msg = f"{emoji} {tool_name}: \"{preview}\""
@@ -7761,10 +7545,6 @@ class GatewayRunner:
                    session_key=session_key,
                    user_config=user_config,
                )
-                logger.debug(
-                    "run_agent resolved: model=%s provider=%s session=%s",
-                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
-                )
            except Exception as exc:
                return {
                    "final_response": f"⚠️ Provider authentication failed: {exc}",
@@ -7805,18 +7585,10 @@ class GatewayRunner:
                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
                    _adapter = self.adapters.get(source.platform)
                    if _adapter:
-                        # Platforms that don't support editing sent messages
-                        # (e.g. WeChat) must not show a cursor in intermediate
-                        # sends — the cursor would be permanently visible because
-                        # it can never be edited away.  Use an empty cursor for
-                        # such platforms so streaming still delivers the final
-                        # response, just without the typing indicator.
-                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
-                        _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
-                            cursor=_effective_cursor,
+                            cursor=_scfg.cursor,
                        )
                        _stream_consumer = GatewayStreamConsumer(
                            adapter=_adapter,
@@ -8274,16 +8046,8 @@ class GatewayRunner:
                    if hasattr(_adapter, 'has_pending_interrupt') and _adapter.has_pending_interrupt(session_key):
                        agent = agent_holder[0]
                        if agent:
-                            # Peek at the pending message text WITHOUT consuming it.
-                            # The message must remain in _pending_messages so the
-                            # post-run dequeue at _dequeue_pending_event() can
-                            # retrieve the full MessageEvent (with media metadata).
-                            # If we pop here, a race exists: the agent may finish
-                            # before checking _interrupt_requested, and the message
-                            # is lost — neither the interrupt path nor the dequeue
-                            # path finds it.
-                            _peek_event = _adapter._pending_messages.get(session_key)
-                            pending_text = _peek_event.text if _peek_event else None
+                            pending_event = _adapter.get_pending_message(session_key)
+                            pending_text = pending_event.text if pending_event else None
                            logger.debug("Interrupt detected from adapter, signaling agent...")
                            agent.interrupt(pending_text)
                            _interrupt_detected.set()
@@ -8296,17 +8060,11 @@ class GatewayRunner:
        interrupt_monitor = asyncio.create_task(monitor_for_interrupt())

        # Periodic "still working" notifications for long-running tasks.
-        # Fires every N seconds so the user knows the agent hasn't died.
-        # Config: agent.gateway_notify_interval in config.yaml, or
-        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
-        # 0 = disable notifications.
-        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
-        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
+        # Fires every 10 minutes so the user knows the agent hasn't died.
+        _NOTIFY_INTERVAL = 600  # 10 minutes
        _notify_start = time.time()

        async def _notify_long_running():
-            if _NOTIFY_INTERVAL is None:
-                return  # Notifications disabled (gateway_notify_interval: 0)
            _notify_adapter = self.adapters.get(source.platform)
            if not _notify_adapter:
                return
@@ -8380,7 +8138,7 @@ class GatewayRunner:
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
-                            _bp_event = _backup_adapter._pending_messages.get(session_key)
+                            _bp_event = _backup_adapter.get_pending_message(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
@@ -8440,7 +8198,7 @@ class GatewayRunner:
                        if (_backup_adapter and _backup_agent
                                and hasattr(_backup_adapter, 'has_pending_interrupt')
                                and _backup_adapter.has_pending_interrupt(session_key)):
-                            _bp_event = _backup_adapter._pending_messages.get(session_key)
+                            _bp_event = _backup_adapter.get_pending_message(session_key)
                            _bp_text = _bp_event.text if _bp_event else None
                            logger.info(
                                "Backup interrupt detected for session %s "
@@ -764,19 +764,6 @@ class SessionStore:
                self._db.create_session(**db_create_kwargs)
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")
-            # Write gateway routing metadata to state.db so it can serve
-            # as the single source of truth (replacing sessions.json reads).
-            try:
-                self._db.set_gateway_metadata(
-                    session_id=entry.session_id,
-                    session_key=entry.session_key,
-                    platform=entry.platform.value if entry.platform else None,
-                    chat_type=entry.chat_type,
-                    origin_json=json.dumps(entry.origin.to_dict()) if entry.origin else None,
-                    display_name=entry.display_name,
-                )
-            except Exception as e:
-                logger.debug("Failed to write gateway metadata to state.db: %s", e)

        return entry

@@ -882,17 +869,6 @@ class SessionStore:
                self._db.create_session(**db_create_kwargs)
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
-            try:
-                self._db.set_gateway_metadata(
-                    session_id=new_entry.session_id,
-                    session_key=new_entry.session_key,
-                    platform=new_entry.platform.value if new_entry.platform else None,
-                    chat_type=new_entry.chat_type,
-                    origin_json=json.dumps(new_entry.origin.to_dict()) if new_entry.origin else None,
-                    display_name=new_entry.display_name,
-                )
-            except Exception as e:
-                logger.debug("Failed to write gateway metadata to state.db: %s", e)

        return new_entry

@@ -942,20 +918,6 @@ class SessionStore:
            except Exception as e:
                logger.debug("Session DB end_session failed: %s", e)

-        # Update gateway metadata on the target session
-        if self._db and new_entry:
-            try:
-                self._db.set_gateway_metadata(
-                    session_id=new_entry.session_id,
-                    session_key=new_entry.session_key,
-                    platform=new_entry.platform.value if new_entry.platform else None,
-                    chat_type=new_entry.chat_type,
-                    origin_json=json.dumps(new_entry.origin.to_dict()) if new_entry.origin else None,
-                    display_name=new_entry.display_name,
-                )
-            except Exception as e:
-                logger.debug("Failed to write gateway metadata to state.db: %s", e)
-
        return new_entry

    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
@@ -1303,49 +1303,6 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
    }


-def _write_codex_cli_tokens(
-    access_token: str,
-    refresh_token: str,
-    *,
-    last_refresh: Optional[str] = None,
-) -> None:
-    """Write refreshed tokens back to ~/.codex/auth.json.
-
-    OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
-    When Hermes refreshes a token it consumes the old refresh_token; if we
-    don't write the new pair back, the Codex CLI (or VS Code extension) will
-    fail with ``refresh_token_reused`` on its next refresh attempt.
-
-    This mirrors the Anthropic write-back to ~/.claude/.credentials.json
-    via ``_write_claude_code_credentials()``.
-    """
-    codex_home = os.getenv("CODEX_HOME", "").strip()
-    if not codex_home:
-        codex_home = str(Path.home() / ".codex")
-    auth_path = Path(codex_home).expanduser() / "auth.json"
-    try:
-        existing: Dict[str, Any] = {}
-        if auth_path.is_file():
-            existing = json.loads(auth_path.read_text(encoding="utf-8"))
-        if not isinstance(existing, dict):
-            existing = {}
-
-        tokens_dict = existing.get("tokens")
-        if not isinstance(tokens_dict, dict):
-            tokens_dict = {}
-        tokens_dict["access_token"] = access_token
-        tokens_dict["refresh_token"] = refresh_token
-        existing["tokens"] = tokens_dict
-        if last_refresh is not None:
-            existing["last_refresh"] = last_refresh
-
-        auth_path.parent.mkdir(parents=True, exist_ok=True)
-        auth_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
-        auth_path.chmod(0o600)
-    except (OSError, IOError) as exc:
-        logger.debug("Failed to write refreshed tokens to %s: %s", auth_path, exc)
-
-
 def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None:
    """Save Codex OAuth tokens to Hermes auth store (~/.hermes/auth.json)."""
    if last_refresh is None:
@@ -1468,12 +1425,6 @@ def _refresh_codex_auth_tokens(
    updated_tokens["refresh_token"] = refreshed["refresh_token"]

    _save_codex_tokens(updated_tokens)
-    # Write back to ~/.codex/auth.json so Codex CLI / VS Code stay in sync.
-    _write_codex_cli_tokens(
-        refreshed["access_token"],
-        refreshed["refresh_token"],
-        last_refresh=refreshed.get("last_refresh"),
-    )
    return updated_tokens


@@ -201,7 +201,7 @@ def _validate_backup_zip(zf: zipfile.ZipFile) -> tuple[bool, str]:
        return False, "zip archive is empty"

    # Look for telltale files that a hermes home would have
-    markers = {"config.yaml", ".env", "state.db"}
+    markers = {"config.yaml", ".env", "hermes_state.db", "memory_store.db"}
    found = set()
    for n in names:
        # Could be at the root or one level deep (if someone zipped the directory)
@@ -11,7 +11,6 @@ Usage:

 import importlib.util
 import logging
-import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -53,99 +52,6 @@ _OPENCLAW_SCRIPT_INSTALLED = (
 # Known OpenClaw directory names (current + legacy)
 _OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moltbot")

-def _detect_openclaw_processes() -> list[str]:
-    """Detect running OpenClaw processes and services.
-
-    Returns a list of human-readable descriptions of what was found.
-    An empty list means nothing was detected.
-    """
-    found: list[str] = []
-
-    # -- systemd service (Linux) ------------------------------------------
-    if sys.platform != "win32":
-        try:
-            result = subprocess.run(
-                ["systemctl", "--user", "is-active", "openclaw-gateway.service"],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                found.append("systemd service: openclaw-gateway.service")
-        except (FileNotFoundError, subprocess.TimeoutExpired):
-            pass
-
-    # -- process scan ------------------------------------------------------
-    if sys.platform == "win32":
-        try:
-            for exe in ("openclaw.exe", "clawd.exe"):
-                result = subprocess.run(
-                    ["tasklist", "/FI", f"IMAGENAME eq {exe}"],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if exe in result.stdout.lower():
-                    found.append(f"process: {exe}")
-
-            # Node.js-hosted OpenClaw — tasklist doesn't show command lines,
-            # so fall back to PowerShell.
-            ps_cmd = (
-                'Get-CimInstance Win32_Process -Filter "Name = \'node.exe\'" | '
-                'Where-Object { $_.CommandLine -match "openclaw|clawd" } | '
-                'Select-Object -First 1 ProcessId'
-            )
-            result = subprocess.run(
-                ["powershell", "-NoProfile", "-Command", ps_cmd],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip():
-                found.append(f"node.exe process with openclaw in command line (PID {result.stdout.strip()})")
-        except Exception:
-            pass
-    else:
-        try:
-            result = subprocess.run(
-                ["pgrep", "-f", "openclaw"],
-                capture_output=True, text=True, timeout=3,
-            )
-            if result.returncode == 0:
-                pids = result.stdout.strip().split()
-                found.append(f"openclaw process(es) (PIDs: {', '.join(pids)})")
-        except (FileNotFoundError, subprocess.TimeoutExpired):
-            pass
-
-    return found
-
-
-def _warn_if_openclaw_running(auto_yes: bool) -> None:
-    """Warn if OpenClaw is still running before migration.
-
-    Telegram, Discord, and Slack only allow one active connection per bot
-    token. Migrating while OpenClaw is running causes both to fight for the
-    same token.
-    """
-    running = _detect_openclaw_processes()
-    if not running:
-        return
-
-    print()
-    print_error("OpenClaw appears to be running:")
-    for detail in running:
-        print_info(f"  * {detail}")
-    print_info(
-        "Messaging platforms (Telegram, Discord, Slack) only allow one "
-        "active session per bot token. If you continue, both OpenClaw and "
-        "Hermes may try to use the same token, causing disconnects."
-    )
-    print_info("Recommendation: stop OpenClaw before migrating.")
-    print()
-    if auto_yes:
-        return
-    if not sys.stdin.isatty():
-        print_info("Non-interactive session — continuing to preview only.")
-        return
-    if not prompt_yes_no("Continue anyway?", default=False):
-        print_info("Migration cancelled. Stop OpenClaw and try again.")
-        sys.exit(0)
-
-
 def _warn_if_gateway_running(auto_yes: bool) -> None:
    """Check if a Hermes gateway is running with connected platforms.

@@ -381,11 +287,8 @@ def _cmd_migrate(args):
        print_info(f"Workspace:   {workspace_target}")
    print()

-    # Check if OpenClaw is still running — migrating tokens while both are
-    # active will cause conflicts (e.g. Telegram 409).
-    _warn_if_openclaw_running(auto_yes)
-
-    # Check if a Hermes gateway is running with connected platforms.
+    # Check if a gateway is running with connected platforms — migrating tokens
+    # while the gateway is active will cause conflicts (e.g. Telegram 409).
    _warn_if_gateway_running(auto_yes)

    # Ensure config.yaml exists before migration tries to read it
@@ -527,28 +430,6 @@ def _cmd_cleanup(args):
        print_success("No OpenClaw directories found. Nothing to clean up.")
        return

-    # Warn if OpenClaw is still running — archiving while the service is
-    # active causes it to recreate an empty skeleton directory (#8502).
-    running = _detect_openclaw_processes()
-    if running:
-        print()
-        print_error("OpenClaw appears to be still running:")
-        for detail in running:
-            print_info(f"  * {detail}")
-        print_info(
-            "Archiving .openclaw/ while the service is active may cause it to "
-            "immediately recreate an empty skeleton directory, destroying your config."
-        )
-        print_info("Stop OpenClaw first: systemctl --user stop openclaw-gateway.service")
-        print()
-        if not auto_yes:
-            if not sys.stdin.isatty():
-                print_info("Non-interactive session — aborting. Stop OpenClaw and re-run.")
-                return
-            if not prompt_yes_no("Proceed anyway?", default=False):
-                print_info("Aborted. Stop OpenClaw first, then re-run: hermes claw cleanup")
-                return
-
    total_archived = 0

    for source_dir in dirs_to_check:
@@ -129,7 +129,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
-    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
@@ -155,7 +154,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="<path>"),
    CommandDef("update", "Update Hermes Agent to the latest version", "Info",
               gateway_only=True),
-    CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),

    # Exit
    CommandDef("quit", "Exit the CLI", "Exit",
@@ -50,7 +50,6 @@ _EXTRA_ENV_KEYS = frozenset({
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
-    "MATRIX_RECOVERY_KEY",
 })
 import yaml

@@ -148,6 +147,25 @@ def managed_error(action: str = "modify configuration"):
 # Container-aware CLI (NixOS container mode)
 # =============================================================================

+def _is_inside_container() -> bool:
+    """Detect if we're already running inside a Docker/Podman container."""
+    # Standard Docker/Podman indicators
+    if os.path.exists("/.dockerenv"):
+        return True
+    # Podman uses /run/.containerenv
+    if os.path.exists("/run/.containerenv"):
+        return True
+    # Check cgroup for container runtime evidence (works for both Docker & Podman)
+    try:
+        with open("/proc/1/cgroup", "r") as f:
+            cgroup = f.read()
+            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
+                return True
+    except OSError:
+        pass
+    return False
+
+
 def get_container_exec_info() -> Optional[dict]:
    """Read container mode metadata from HERMES_HOME/.container-mode.

@@ -162,8 +180,7 @@ def get_container_exec_info() -> Optional[dict]:
    if os.environ.get("HERMES_DEV") == "1":
        return None

-    from hermes_constants import is_container
-    if is_container():
+    if _is_inside_container():
        return None

    container_mode_file = get_hermes_home() / ".container-mode"
@@ -337,10 +354,6 @@ DEFAULT_CONFIG = {
        # threshold before escalating to a full timeout.  The warning fires
        # once per run and does not interrupt the agent.  0 = disable warning.
        "gateway_timeout_warning": 900,
-        # Periodic "still working" notification interval (seconds).
-        # Sends a status message every N seconds so the user knows the
-        # agent hasn't died during long tasks.  0 = disable notifications.
-        "gateway_notify_interval": 600,
    },
    
    "terminal": {
@@ -1280,14 +1293,6 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
-    "MATRIX_RECOVERY_KEY": {
-        "description": "Matrix recovery key for cross-signing verification after device key rotation (from Element: Settings → Security → Recovery Key)",
-        "prompt": "Matrix recovery key",
-        "url": None,
-        "password": True,
-        "category": "messaging",
-        "advanced": True,
-    },
    "BLUEBUBBLES_SERVER_URL": {
        "description": "BlueBubbles server URL for iMessage integration (e.g. http://192.168.1.10:1234)",
        "prompt": "BlueBubbles server URL",
@@ -2636,28 +2641,6 @@ def save_env_value_secure(key: str, value: str) -> Dict[str, Any]:



-def reload_env() -> int:
-    """Re-read ~/.hermes/.env into os.environ. Returns count of vars updated.
-
-    Adds/updates vars that changed and removes vars that were deleted from
-    the .env file (but only vars known to Hermes — OPTIONAL_ENV_VARS and
-    _EXTRA_ENV_KEYS — to avoid clobbering unrelated environment).
-    """
-    env_vars = load_env()
-    known_keys = set(OPTIONAL_ENV_VARS.keys()) | _EXTRA_ENV_KEYS
-    count = 0
-    for key, value in env_vars.items():
-        if os.environ.get(key) != value:
-            os.environ[key] = value
-            count += 1
-    # Remove known Hermes vars that are no longer in .env
-    for key in known_keys:
-        if key not in env_vars and key in os.environ:
-            del os.environ[key]
-            count += 1
-    return count
-
-
 def get_env_value(key: str) -> Optional[str]:
    """Get a value from ~/.hermes/.env or environment."""
    # Check environment first
@@ -1,336 +0,0 @@
-"""``hermes debug`` — debug tools for Hermes Agent.
-
-Currently supports:
-    hermes debug share    Upload debug report (system info + logs) to a
-                          paste service and print a shareable URL.
-"""
-
-import io
-import sys
-import urllib.error
-import urllib.parse
-import urllib.request
-from pathlib import Path
-from typing import Optional
-
-from hermes_constants import get_hermes_home
-
-
-# ---------------------------------------------------------------------------
-# Paste services — try paste.rs first, dpaste.com as fallback.
-# ---------------------------------------------------------------------------
-
-_PASTE_RS_URL = "https://paste.rs/"
-_DPASTE_COM_URL = "https://dpaste.com/api/"
-
-# Maximum bytes to read from a single log file for upload.
-# paste.rs caps at ~1 MB; we stay under that with headroom.
-_MAX_LOG_BYTES = 512_000
-
-
-def _upload_paste_rs(content: str) -> str:
-    """Upload to paste.rs.  Returns the paste URL.
-
-    paste.rs accepts a plain POST body and returns the URL directly.
-    """
-    data = content.encode("utf-8")
-    req = urllib.request.Request(
-        _PASTE_RS_URL, data=data, method="POST",
-        headers={
-            "Content-Type": "text/plain; charset=utf-8",
-            "User-Agent": "hermes-agent/debug-share",
-        },
-    )
-    with urllib.request.urlopen(req, timeout=30) as resp:
-        url = resp.read().decode("utf-8").strip()
-    if not url.startswith("http"):
-        raise ValueError(f"Unexpected response from paste.rs: {url[:200]}")
-    return url
-
-
-def _upload_dpaste_com(content: str, expiry_days: int = 7) -> str:
-    """Upload to dpaste.com.  Returns the paste URL.
-
-    dpaste.com uses multipart form data.
-    """
-    boundary = "----HermesDebugBoundary9f3c"
-
-    def _field(name: str, value: str) -> str:
-        return (
-            f"--{boundary}\r\n"
-            f'Content-Disposition: form-data; name="{name}"\r\n'
-            f"\r\n"
-            f"{value}\r\n"
-        )
-
-    body = (
-        _field("content", content)
-        + _field("syntax", "text")
-        + _field("expiry_days", str(expiry_days))
-        + f"--{boundary}--\r\n"
-    ).encode("utf-8")
-
-    req = urllib.request.Request(
-        _DPASTE_COM_URL, data=body, method="POST",
-        headers={
-            "Content-Type": f"multipart/form-data; boundary={boundary}",
-            "User-Agent": "hermes-agent/debug-share",
-        },
-    )
-    with urllib.request.urlopen(req, timeout=30) as resp:
-        url = resp.read().decode("utf-8").strip()
-    if not url.startswith("http"):
-        raise ValueError(f"Unexpected response from dpaste.com: {url[:200]}")
-    return url
-
-
-def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
-    """Upload *content* to a paste service, trying paste.rs then dpaste.com.
-
-    Returns the paste URL on success, raises on total failure.
-    """
-    errors: list[str] = []
-
-    # Try paste.rs first (simple, fast)
-    try:
-        return _upload_paste_rs(content)
-    except Exception as exc:
-        errors.append(f"paste.rs: {exc}")
-
-    # Fallback: dpaste.com (supports expiry)
-    try:
-        return _upload_dpaste_com(content, expiry_days=expiry_days)
-    except Exception as exc:
-        errors.append(f"dpaste.com: {exc}")
-
-    raise RuntimeError(
-        "Failed to upload to any paste service:\n  " + "\n  ".join(errors)
-    )
-
-
-# ---------------------------------------------------------------------------
-# Log file reading
-# ---------------------------------------------------------------------------
-
-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.
-
-    Returns the path if found, or None.
-    """
-    from hermes_cli.logs import LOG_FILES
-
-    filename = LOG_FILES.get(log_name)
-    if not filename:
-        return None
-
-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
-    if primary.exists() and primary.stat().st_size > 0:
-        return primary
-
-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
-    if rotated.exists() and rotated.stat().st_size > 0:
-        return rotated
-
-    return None
-
-
-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
-
-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
-    """
-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return None
-
-    try:
-        size = log_path.stat().st_size
-        if size == 0:
-            return None
-
-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
-        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
-
-
-# ---------------------------------------------------------------------------
-# Debug report collection
-# ---------------------------------------------------------------------------
-
-def _capture_dump() -> str:
-    """Run ``hermes dump`` and return its stdout as a string."""
-    from hermes_cli.dump import run_dump
-
-    class _FakeArgs:
-        show_keys = False
-
-    old_stdout = sys.stdout
-    sys.stdout = capture = io.StringIO()
-    try:
-        run_dump(_FakeArgs())
-    except SystemExit:
-        pass
-    finally:
-        sys.stdout = old_stdout
-
-    return capture.getvalue()
-
-
-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
-    """Build the summary debug report: system dump + log tails.
-
-    Parameters
-    ----------
-    log_lines
-        Number of recent lines to include per log file.
-    dump_text
-        Pre-captured dump output.  If empty, ``hermes dump`` is run
-        internally.
-
-    Returns the report as a plain-text string ready for upload.
-    """
-    buf = io.StringIO()
-
-    if not dump_text:
-        dump_text = _capture_dump()
-    buf.write(dump_text)
-
-    # ── Recent log tails (summary only) ──────────────────────────────────
-    buf.write("\n\n")
-    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
-    buf.write("\n\n")
-
-    errors_lines = min(log_lines, 100)
-    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
-    buf.write("\n\n")
-
-    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
-    buf.write("\n")
-
-    return buf.getvalue()
-
-
-# ---------------------------------------------------------------------------
-# CLI entry points
-# ---------------------------------------------------------------------------
-
-def run_debug_share(args):
-    """Collect debug report + full logs, upload each, print URLs."""
-    log_lines = getattr(args, "lines", 200)
-    expiry = getattr(args, "expire", 7)
-    local_only = getattr(args, "local", False)
-
-    print("Collecting debug report...")
-
-    # Capture dump once — prepended to every paste for context.
-    dump_text = _capture_dump()
-
-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
-
-    # Prepend dump header to each full log so every paste is self-contained.
-    if agent_log:
-        agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
-    if gateway_log:
-        gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
-
-    if local_only:
-        print(report)
-        if agent_log:
-            print(f"\n\n{'=' * 60}")
-            print("FULL agent.log")
-            print(f"{'=' * 60}\n")
-            print(agent_log)
-        if gateway_log:
-            print(f"\n\n{'=' * 60}")
-            print("FULL gateway.log")
-            print(f"{'=' * 60}\n")
-            print(gateway_log)
-        return
-
-    print("Uploading...")
-    urls: dict[str, str] = {}
-    failures: list[str] = []
-
-    # 1. Summary report (required)
-    try:
-        urls["Report"] = upload_to_pastebin(report, expiry_days=expiry)
-    except RuntimeError as exc:
-        print(f"\nUpload failed: {exc}", file=sys.stderr)
-        print("\nFull report printed below — copy-paste it manually:\n")
-        print(report)
-        sys.exit(1)
-
-    # 2. Full agent.log (optional)
-    if agent_log:
-        try:
-            urls["agent.log"] = upload_to_pastebin(agent_log, expiry_days=expiry)
-        except Exception as exc:
-            failures.append(f"agent.log: {exc}")
-
-    # 3. Full gateway.log (optional)
-    if gateway_log:
-        try:
-            urls["gateway.log"] = upload_to_pastebin(gateway_log, expiry_days=expiry)
-        except Exception as exc:
-            failures.append(f"gateway.log: {exc}")
-
-    # Print results
-    label_width = max(len(k) for k in urls)
-    print(f"\nDebug report uploaded:")
-    for label, url in urls.items():
-        print(f"  {label:<{label_width}}  {url}")
-
-    if failures:
-        print(f"\n  (failed to upload: {', '.join(failures)})")
-
-    print(f"\nShare these links with the Hermes team for support.")
-
-
-def run_debug(args):
-    """Route debug subcommands."""
-    subcmd = getattr(args, "debug_command", None)
-    if subcmd == "share":
-        run_debug_share(args)
-    else:
-        # Default: show help
-        print("Usage: hermes debug share [--lines N] [--expire N] [--local]")
-        print()
-        print("Commands:")
-        print("  share    Upload debug report to a paste service and print URL")
-        print()
-        print("Options:")
-        print("  --lines N    Number of log lines to include (default: 200)")
-        print("  --expire N   Paste expiry in days (default: 7)")
-        print("  --local      Print report locally instead of uploading")
@@ -44,16 +44,6 @@ def _redact(value: str) -> str:
 def _gateway_status() -> str:
    """Return a short gateway status string."""
    if sys.platform.startswith("linux"):
-        from hermes_constants import is_container
-        if is_container():
-            try:
-                from hermes_cli.gateway import find_gateway_pids
-                pids = find_gateway_pids()
-                if pids:
-                    return f"running (docker, pid {pids[0]})"
-                return "stopped (docker)"
-            except Exception:
-                return "stopped (docker)"
        try:
            from hermes_cli.gateway import get_service_name
            svc = get_service_name()
@@ -331,7 +331,7 @@ def is_linux() -> bool:
    return sys.platform.startswith('linux')


-from hermes_constants import is_container, is_termux, is_wsl
+from hermes_constants import is_termux, is_wsl


 def _wsl_systemd_operational() -> bool:
@@ -353,9 +353,7 @@ def _wsl_systemd_operational() -> bool:


 def supports_systemd_services() -> bool:
-    if not is_linux() or is_termux() or is_container():
-        return False
-    if shutil.which("systemctl") is None:
+    if not is_linux() or is_termux():
        return False
    if is_wsl():
        return _wsl_systemd_operational()
@@ -485,21 +483,6 @@ def _journalctl_cmd(system: bool = False) -> list[str]:
    return ["journalctl"] if system else ["journalctl", "--user"]


-def _run_systemctl(args: list[str], *, system: bool = False, **kwargs) -> subprocess.CompletedProcess:
-    """Run a systemctl command, raising RuntimeError if systemctl is missing.
-
-    Defense-in-depth: callers are gated by ``supports_systemd_services()``,
-    but this ensures any future caller that bypasses the gate still gets a
-    clear error instead of a raw ``FileNotFoundError`` traceback.
-    """
-    try:
-        return subprocess.run(_systemctl_cmd(system) + args, **kwargs)
-    except FileNotFoundError:
-        raise RuntimeError(
-            "systemctl is not available on this system"
-        ) from None
-
-
 def _service_scope_label(system: bool = False) -> str:
    return "system" if system else "user"

@@ -768,22 +751,14 @@ def _remap_path_for_user(path: str, target_home_dir: str) -> str:

      /root/.hermes/hermes-agent  -> /home/alice/.hermes/hermes-agent
      /opt/hermes                 -> /opt/hermes  (kept as-is)
-
-    Note: this function intentionally does NOT resolve symlinks. A venv's
-    ``bin/python`` is typically a symlink to the base interpreter (e.g. a
-    uv-managed CPython at ``~/.local/share/uv/python/.../python3.11``);
-    resolving that symlink swaps the unit's ``ExecStart`` to a bare Python
-    that has none of the venv's site-packages, so the service crashes on
-    the first ``import``. Keep the symlinked path so the venv activates
-    its own environment. Lexical expansion only via ``expanduser``.
    """
-    current_home = Path.home()
-    p = Path(path).expanduser()
+    current_home = Path.home().resolve()
+    resolved = Path(path).resolve()
    try:
-        relative = p.relative_to(current_home)
+        relative = resolved.relative_to(current_home)
        return str(Path(target_home_dir) / relative)
    except ValueError:
-        return str(p)
+        return str(resolved)


 def _hermes_home_for_target_user(target_home_dir: str) -> str:
@@ -954,7 +929,7 @@ def refresh_systemd_unit_if_needed(system: bool = False) -> bool:

    expected_user = _read_systemd_user_from_unit(unit_path) if system else None
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=expected_user), encoding="utf-8")
-    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
    print(f"↻ Updated gateway {_service_scope_label(system)} service definition to match the current Hermes install")
    return True

@@ -1050,7 +1025,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
        if not systemd_unit_is_current(system=system):
            print(f"↻ Repairing outdated {_service_scope_label(system)} systemd service at: {unit_path}")
            refresh_systemd_unit_if_needed(system=system)
-            _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)
+            subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)
            print(f"✓ {_service_scope_label(system).capitalize()} service definition updated")
            return
        print(f"Service already installed at: {unit_path}")
@@ -1061,8 +1036,8 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    print(f"Installing {_service_scope_label(system)} systemd service to: {unit_path}")
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")

-    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
-    _run_systemctl(["enable", get_service_name()], system=system, check=True, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True, timeout=30)

    print()
    print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
@@ -1088,15 +1063,15 @@ def systemd_uninstall(system: bool = False):
    if system:
        _require_root_for_system_service("uninstall")

-    _run_systemctl(["stop", get_service_name()], system=system, check=False, timeout=90)
-    _run_systemctl(["disable", get_service_name()], system=system, check=False, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False, timeout=90)
+    subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False, timeout=30)

    unit_path = get_systemd_unit_path(system=system)
    if unit_path.exists():
        unit_path.unlink()
        print(f"✓ Removed {unit_path}")

-    _run_systemctl(["daemon-reload"], system=system, check=True, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")


@@ -1105,7 +1080,7 @@ def systemd_start(system: bool = False):
    if system:
        _require_root_for_system_service("start")
    refresh_systemd_unit_if_needed(system=system)
-    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
+    subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")


@@ -1114,7 +1089,7 @@ def systemd_stop(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("stop")
-    _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90)
+    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")


@@ -1130,7 +1105,7 @@ def systemd_restart(system: bool = False):
    if pid is not None and _request_gateway_self_restart(pid):
        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
        return
-    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
+    subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")


@@ -1154,16 +1129,14 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

-    _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
-        system=system,
+    subprocess.run(
+        _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"],
        capture_output=False,
        timeout=10,
    )

-    result = _run_systemctl(
-        ["is-active", get_service_name()],
-        system=system,
+    result = subprocess.run(
+        _systemctl_cmd(system) + ["is-active", get_service_name()],
        capture_output=True,
        text=True,
        timeout=10,
@@ -2127,6 +2100,12 @@ def _setup_dingtalk():
    _setup_standard_platform(dingtalk_platform)


+def _setup_feishu():
+    """Configure Feishu / Lark via the standard platform setup."""
+    feishu_platform = next(p for p in _PLATFORMS if p["key"] == "feishu")
+    _setup_standard_platform(feishu_platform)
+
+
 def _setup_wecom():
    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
@@ -2150,24 +2129,24 @@ def _is_service_running() -> bool:

        if user_unit_exists:
            try:
-                result = _run_systemctl(
-                    ["is-active", get_service_name()],
-                    system=False, capture_output=True, text=True, timeout=10,
+                result = subprocess.run(
+                    _systemctl_cmd(False) + ["is-active", get_service_name()],
+                    capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except (RuntimeError, subprocess.TimeoutExpired):
+            except subprocess.TimeoutExpired:
                pass

        if system_unit_exists:
            try:
-                result = _run_systemctl(
-                    ["is-active", get_service_name()],
-                    system=True, capture_output=True, text=True, timeout=10,
+                result = subprocess.run(
+                    _systemctl_cmd(True) + ["is-active", get_service_name()],
+                    capture_output=True, text=True, timeout=10,
                )
                if result.stdout.strip() == "active":
                    return True
-            except (RuntimeError, subprocess.TimeoutExpired):
+            except subprocess.TimeoutExpired:
                pass

        return False
@@ -2311,178 +2290,6 @@ def _setup_weixin():
        print_info(f"  User ID: {user_id}")


-def _setup_feishu():
-    """Interactive setup for Feishu / Lark — scan-to-create or manual credentials."""
-    print()
-    print(color("  ─── 🪽 Feishu / Lark Setup ───", Colors.CYAN))
-
-    existing_app_id = get_env_value("FEISHU_APP_ID")
-    existing_secret = get_env_value("FEISHU_APP_SECRET")
-    if existing_app_id and existing_secret:
-        print()
-        print_success("Feishu / Lark is already configured.")
-        if not prompt_yes_no("  Reconfigure Feishu / Lark?", False):
-            return
-
-    # ── Choose setup method ──
-    print()
-    method_choices = [
-        "Scan QR code to create a new bot automatically (recommended)",
-        "Enter existing App ID and App Secret manually",
-    ]
-    method_idx = prompt_choice("  How would you like to set up Feishu / Lark?", method_choices, 0)
-
-    credentials = None
-    used_qr = False
-
-    if method_idx == 0:
-        # ── QR scan-to-create ──
-        try:
-            from gateway.platforms.feishu import qr_register
-        except Exception as exc:
-            print_error(f"  Feishu / Lark onboard import failed: {exc}")
-            qr_register = None
-
-        if qr_register is not None:
-            try:
-                credentials = qr_register()
-            except KeyboardInterrupt:
-                print()
-                print_warning("  Feishu / Lark setup cancelled.")
-                return
-            except Exception as exc:
-                print_warning(f"  QR registration failed: {exc}")
-        if credentials:
-            used_qr = True
-        if not credentials:
-            print_info("  QR setup did not complete. Continuing with manual input.")
-
-    # ── Manual credential input ──
-    if not credentials:
-        print()
-        print_info("  Go to https://open.feishu.cn/ (or https://open.larksuite.com/ for Lark)")
-        print_info("  Create an app, enable the Bot capability, and copy the credentials.")
-        print()
-        app_id = prompt("  App ID", password=False)
-        if not app_id:
-            print_warning("  Skipped — Feishu / Lark won't work without an App ID.")
-            return
-        app_secret = prompt("  App Secret", password=True)
-        if not app_secret:
-            print_warning("  Skipped — Feishu / Lark won't work without an App Secret.")
-            return
-
-        domain_choices = ["feishu (China)", "lark (International)"]
-        domain_idx = prompt_choice("  Domain", domain_choices, 0)
-        domain = "lark" if domain_idx == 1 else "feishu"
-
-        # Try to probe the bot with manual credentials
-        bot_name = None
-        try:
-            from gateway.platforms.feishu import probe_bot
-            bot_info = probe_bot(app_id, app_secret, domain)
-            if bot_info:
-                bot_name = bot_info.get("bot_name")
-                print_success(f"  Credentials verified — bot: {bot_name or 'unnamed'}")
-            else:
-                print_warning("  Could not verify bot connection. Credentials saved anyway.")
-        except Exception as exc:
-            print_warning(f"  Credential verification skipped: {exc}")
-
-        credentials = {
-            "app_id": app_id,
-            "app_secret": app_secret,
-            "domain": domain,
-            "open_id": None,
-            "bot_name": bot_name,
-        }
-
-    # ── Save core credentials ──
-    app_id = credentials["app_id"]
-    app_secret = credentials["app_secret"]
-    domain = credentials.get("domain", "feishu")
-    open_id = credentials.get("open_id")
-    bot_name = credentials.get("bot_name")
-
-    save_env_value("FEISHU_APP_ID", app_id)
-    save_env_value("FEISHU_APP_SECRET", app_secret)
-    save_env_value("FEISHU_DOMAIN", domain)
-    # Bot identity is resolved at runtime via _hydrate_bot_identity().
-
-    # ── Connection mode ──
-    if used_qr:
-        connection_mode = "websocket"
-    else:
-        print()
-        mode_choices = [
-            "WebSocket (recommended — no public URL needed)",
-            "Webhook (requires a reachable HTTP endpoint)",
-        ]
-        mode_idx = prompt_choice("  Connection mode", mode_choices, 0)
-        connection_mode = "webhook" if mode_idx == 1 else "websocket"
-        if connection_mode == "webhook":
-            print_info("  Webhook defaults: 127.0.0.1:8765/feishu/webhook")
-            print_info("  Override with FEISHU_WEBHOOK_HOST / FEISHU_WEBHOOK_PORT / FEISHU_WEBHOOK_PATH")
-            print_info("  For signature verification, set FEISHU_ENCRYPT_KEY and FEISHU_VERIFICATION_TOKEN")
-    save_env_value("FEISHU_CONNECTION_MODE", connection_mode)
-
-    if bot_name:
-        print()
-        print_success(f"  Bot created: {bot_name}")
-
-    # ── DM security policy ──
-    print()
-    access_choices = [
-        "Use DM pairing approval (recommended)",
-        "Allow all direct messages",
-        "Only allow listed user IDs",
-    ]
-    access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
-    if access_idx == 0:
-        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
-        save_env_value("FEISHU_ALLOWED_USERS", "")
-        print_success("  DM pairing enabled.")
-        print_info("  Unknown users can request access; approve with `hermes pairing approve`.")
-    elif access_idx == 1:
-        save_env_value("FEISHU_ALLOW_ALL_USERS", "true")
-        save_env_value("FEISHU_ALLOWED_USERS", "")
-        print_warning("  Open DM access enabled for Feishu / Lark.")
-    else:
-        save_env_value("FEISHU_ALLOW_ALL_USERS", "false")
-        default_allow = open_id or ""
-        allowlist = prompt("  Allowed user IDs (comma-separated)", default_allow, password=False).replace(" ", "")
-        save_env_value("FEISHU_ALLOWED_USERS", allowlist)
-        print_success("  Allowlist saved.")
-
-    # ── Group policy ──
-    print()
-    group_choices = [
-        "Respond only when @mentioned in groups (recommended)",
-        "Disable group chats",
-    ]
-    group_idx = prompt_choice("  How should group chats be handled?", group_choices, 0)
-    if group_idx == 0:
-        save_env_value("FEISHU_GROUP_POLICY", "open")
-        print_info("  Group chats enabled (bot must be @mentioned).")
-    else:
-        save_env_value("FEISHU_GROUP_POLICY", "disabled")
-        print_info("  Group chats disabled.")
-
-    # ── Home channel ──
-    print()
-    home_channel = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
-    if home_channel:
-        save_env_value("FEISHU_HOME_CHANNEL", home_channel)
-        print_success(f"  Home channel set to {home_channel}")
-
-    print()
-    print_success("🪽 Feishu / Lark configured!")
-    print_info(f"  App ID: {app_id}")
-    print_info(f"  Domain: {domain}")
-    if bot_name:
-        print_info(f"  Bot: {bot_name}")
-
-
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -2660,8 +2467,6 @@ def gateway_setup():
            _setup_signal()
        elif platform["key"] == "weixin":
            _setup_weixin()
-        elif platform["key"] == "feishu":
-            _setup_feishu()
        else:
            _setup_standard_platform(platform)

@@ -2801,15 +2606,6 @@ def gateway_command(args):
            print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
            sys.exit(1)
-        elif is_container():
-            print("Service installation is not needed inside a Docker container.")
-            print("The container runtime is your service manager — use Docker restart policies instead:")
-            print()
-            print("  docker run --restart unless-stopped ...   # auto-restart on crash/reboot")
-            print("  docker restart <container>                # manual restart")
-            print()
-            print("To run the gateway: hermes gateway run")
-            sys.exit(0)
        else:
            print("Service installation not supported on this platform.")
            print("Run manually: hermes gateway run")
@@ -2828,17 +2624,10 @@ def gateway_command(args):
            systemd_uninstall(system=system)
        elif is_macos():
            launchd_uninstall()
-        elif is_container():
-            print("Service uninstall is not applicable inside a Docker container.")
-            print("To stop the gateway, stop or remove the container:")
-            print()
-            print("  docker stop <container>")
-            print("  docker rm <container>")
-            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-
+    
    elif subcmd == "start":
        system = getattr(args, 'system', False)
        if is_termux():
@@ -2859,19 +2648,10 @@ def gateway_command(args):
            print()
            print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
            sys.exit(1)
-        elif is_container():
-            print("Service start is not applicable inside a Docker container.")
-            print("The gateway runs as the container's main process.")
-            print()
-            print("  docker start <container>     # start a stopped container")
-            print("  docker restart <container>   # restart a running container")
-            print()
-            print("Or run the gateway directly: hermes gateway run")
-            sys.exit(0)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
-
+    
    elif subcmd == "stop":
        stop_all = getattr(args, 'all', False)
        system = getattr(args, 'system', False)
@@ -1107,7 +1107,6 @@ def select_provider_and_model(args=None):
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
                "model": entry.get("model", ""),
-                "api_mode": entry.get("api_mode", ""),
            }
        return custom_provider_map

@@ -1956,12 +1955,6 @@ def _model_flow_named_custom(config, provider_info):
    model["base_url"] = base_url
    if api_key:
        model["api_key"] = api_key
-    # Apply api_mode from custom_providers entry, or clear stale value
-    custom_api_mode = provider_info.get("api_mode", "")
-    if custom_api_mode:
-        model["api_mode"] = custom_api_mode
-    else:
-        model.pop("api_mode", None)  # let runtime auto-detect from URL
    save_config(cfg)
    deactivate_provider()

@@ -2499,11 +2492,8 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print()
        override = ""
    if override and base_url_env:
-        if not override.startswith(("http://", "https://")):
-            print("  Invalid URL — must start with http:// or https://. Keeping current value.")
-        else:
-            save_env_value(base_url_env, override)
-            effective_base = override
+        save_env_value(base_url_env, override)
+        effective_base = override

    # Model selection — resolution order:
    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
@@ -2834,12 +2824,6 @@ def cmd_dump(args):
    run_dump(args)


-def cmd_debug(args):
-    """Debug tools (share report, etc.)."""
-    from hermes_cli.debug import run_debug
-    run_debug(args)
-
-
 def cmd_config(args):
    """Configuration management."""
    from hermes_cli.config import config_command
@@ -2976,44 +2960,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    return default


-def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
-    """Build the web UI frontend if npm is available.
-
-    Args:
-        web_dir: Path to the ``web/`` source directory.
-        fatal: If True, print error guidance and return False on failure
-               instead of a soft warning (used by ``hermes web``).
-
-    Returns True if the build succeeded or was skipped (no package.json).
-    """
-    if not (web_dir / "package.json").exists():
-        return True
-    import shutil
-    npm = shutil.which("npm")
-    if not npm:
-        if fatal:
-            print("Web UI frontend not built and npm is not available.")
-            print("Install Node.js, then run:  cd web && npm install && npm run build")
-        return not fatal
-    print("→ Building web UI...")
-    r1 = subprocess.run([npm, "install", "--silent"], cwd=web_dir, capture_output=True)
-    if r1.returncode != 0:
-        print(f"  {'✗' if fatal else '⚠'} Web UI npm install failed"
-              + ("" if fatal else " (hermes web will not be available)"))
-        if fatal:
-            print("  Run manually:  cd web && npm install && npm run build")
-        return False
-    r2 = subprocess.run([npm, "run", "build"], cwd=web_dir, capture_output=True)
-    if r2.returncode != 0:
-        print(f"  {'✗' if fatal else '⚠'} Web UI build failed"
-              + ("" if fatal else " (hermes web will not be available)"))
-        if fatal:
-            print("  Run manually:  cd web && npm install && npm run build")
-        return False
-    print("  ✓ Web UI built")
-    return True
-
-
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.
    
@@ -3108,10 +3054,7 @@ def _update_via_zip(args):
                check=True,
            )
        _install_python_dependencies_with_optional_fallback(pip_cmd)
-
-    # Build web UI frontend (optional — requires npm)
-    _build_web_ui(PROJECT_ROOT / "web")
-
+    
    # Sync skills
    try:
        from tools.skills_sync import sync_skills
@@ -3858,10 +3801,7 @@ def cmd_update(args):
            if shutil.which("npm"):
                print("→ Updating Node.js dependencies...")
                subprocess.run(["npm", "install", "--silent"], cwd=PROJECT_ROOT, check=False)
-
-        # Build web UI frontend (optional — requires npm)
-        _build_web_ui(PROJECT_ROOT / "web")
-
+        
        print()
        print("✓ Code updated!")
        
@@ -3989,26 +3929,6 @@ def cmd_update(args):
        print()
        print("✓ Update complete!")
        
-        # Write exit code *before* the gateway restart attempt.
-        # When running as ``hermes update --gateway`` (spawned by the gateway's
-        # /update command), this process lives inside the gateway's systemd
-        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
-        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
-        # us and the wrapping bash shell.  The shell never reaches its
-        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
-        # marker file is never created.  The new gateway's update watcher then
-        # polls for 30 minutes and sends a spurious timeout message.
-        #
-        # Writing the marker here — after git pull + pip install succeed but
-        # before we attempt the restart — ensures the new gateway sees it
-        # regardless of how we die.
-        if gateway_mode:
-            _exit_code_path = get_hermes_home() / ".update_exit_code"
-            try:
-                _exit_code_path.write_text("0")
-            except OSError:
-                pass
-        
        # Auto-restart ALL gateways after update.
        # The code update (git pull) is shared across all profiles, so every
        # running gateway needs restarting to pick up the new code.
@@ -4143,7 +4063,7 @@ def _coalesce_session_name_args(argv: list) -> list:
        "chat", "model", "gateway", "setup", "whatsapp", "login", "logout", "auth",
        "status", "cron", "doctor", "config", "pairing", "skills", "tools",
        "mcp", "sessions", "insights", "version", "update", "uninstall",
-        "profile", "dashboard",
+        "profile",
    }
    _SESSION_FLAGS = {"-c", "--continue", "-r", "--resume"}

@@ -4293,24 +4213,18 @@ def cmd_profile(args):
                            print(f'  Add to your shell config (~/.bashrc or ~/.zshrc):')
                            print(f'    export PATH="$HOME/.local/bin:$PATH"')

-            # Profile dir for display
-            try:
-                profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
-            except ValueError:
-                profile_dir_display = str(profile_dir)
-
            # Next steps
            print(f"\nNext steps:")
            print(f"  {name} setup              Configure API keys and model")
            print(f"  {name} chat               Start chatting")
            print(f"  {name} gateway start      Start the messaging gateway")
            if clone or clone_all:
+                try:
+                    profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
+                except ValueError:
+                    profile_dir_display = str(profile_dir)
                print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
-            else:
-                print(f"\n  ⚠ This profile has no API keys yet. Run '{name} setup' first,")
-                print(f"    or it will inherit keys from your shell environment.")
-                print(f"  Edit {profile_dir_display}/SOUL.md to customize personality")
            print()

        except (ValueError, FileExistsError, FileNotFoundError) as e:
@@ -4421,27 +4335,6 @@ def cmd_profile(args):
            sys.exit(1)


-def cmd_dashboard(args):
-    """Start the web UI server."""
-    try:
-        import fastapi  # noqa: F401
-        import uvicorn  # noqa: F401
-    except ImportError:
-        print("Web UI dependencies not installed.")
-        print("Install them with:  pip install hermes-agent[web]")
-        sys.exit(1)
-
-    if not _build_web_ui(PROJECT_ROOT / "web", fatal=True):
-        sys.exit(1)
-
-    from hermes_cli.web_server import start_server
-    start_server(
-        host=args.host,
-        port=args.port,
-        open_browser=not args.no_open,
-    )
-
-
 def cmd_completion(args):
    """Print shell completion script."""
    from hermes_cli.profiles import generate_bash_completion, generate_zsh_completion
@@ -4507,7 +4400,6 @@ Examples:
    hermes logs -f                Follow agent.log in real time
    hermes logs errors            View errors.log
    hermes logs --since 1h        Lines from the last hour
-    hermes debug share             Upload debug report for support
    hermes update                 Update to latest version

 For more help on a command:
@@ -5037,43 +4929,6 @@ For more help on a command:
    )
    dump_parser.set_defaults(func=cmd_dump)

-    # =========================================================================
-    # debug command
-    # =========================================================================
-    debug_parser = subparsers.add_parser(
-        "debug",
-        help="Debug tools — upload logs and system info for support",
-        description="Debug utilities for Hermes Agent. Use 'hermes debug share' to "
-                    "upload a debug report (system info + recent logs) to a paste "
-                    "service and get a shareable URL.",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""\
-Examples:
-    hermes debug share              Upload debug report and print URL
-    hermes debug share --lines 500  Include more log lines
-    hermes debug share --expire 30  Keep paste for 30 days
-    hermes debug share --local      Print report locally (no upload)
-""",
-    )
-    debug_sub = debug_parser.add_subparsers(dest="debug_command")
-    share_parser = debug_sub.add_parser(
-        "share",
-        help="Upload debug report to a paste service and print a shareable URL",
-    )
-    share_parser.add_argument(
-        "--lines", type=int, default=200,
-        help="Number of log lines to include per log file (default: 200)",
-    )
-    share_parser.add_argument(
-        "--expire", type=int, default=7,
-        help="Paste expiry in days (default: 7)",
-    )
-    share_parser.add_argument(
-        "--local", action="store_true",
-        help="Print the report locally instead of uploading",
-    )
-    debug_parser.set_defaults(func=cmd_debug)
-
    # =========================================================================
    # backup command
    # =========================================================================
@@ -5927,19 +5782,6 @@ Examples:
    )
    completion_parser.set_defaults(func=cmd_completion)

-    # =========================================================================
-    # dashboard command
-    # =========================================================================
-    dashboard_parser = subparsers.add_parser(
-        "dashboard",
-        help="Start the web UI dashboard",
-        description="Launch the Hermes Agent web dashboard for managing config, API keys, and sessions",
-    )
-    dashboard_parser.add_argument("--port", type=int, default=9119, help="Port (default 9119)")
-    dashboard_parser.add_argument("--host", default="127.0.0.1", help="Host (default 127.0.0.1)")
-    dashboard_parser.add_argument("--no-open", action="store_true", help="Don't open browser automatically")
-    dashboard_parser.set_defaults(func=cmd_dashboard)
-
    # =========================================================================
    # logs command
    # =========================================================================
@@ -8,9 +8,8 @@ Different LLM providers expect model identifiers in different formats:
  hyphens: ``claude-sonnet-4-6``.
 - **Copilot** expects bare names *with* dots preserved:
  ``claude-sonnet-4.6``.
- **OpenCode Zen** preserves dots for GPT/GLM/Gemini/Kimi/MiniMax-style
-  model IDs, but Claude still uses hyphenated native names like
-  ``claude-sonnet-4-6``.
+- **OpenCode Zen** follows the same dot-to-hyphen convention as
+  Anthropic: ``claude-sonnet-4-6``.
 - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
 - **DeepSeek** only accepts two model identifiers:
  ``deepseek-chat`` and ``deepseek-reasoner``.
@@ -68,6 +67,7 @@ _AGGREGATOR_PROVIDERS: frozenset[str] = frozenset({
 # Providers that want bare names with dots replaced by hyphens.
 _DOT_TO_HYPHEN_PROVIDERS: frozenset[str] = frozenset({
    "anthropic",
+    "opencode-zen",
 })

 # Providers that want bare names with dots preserved.
@@ -329,9 +329,6 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
        >>> normalize_model_for_provider("claude-sonnet-4.6", "opencode-zen")
        'claude-sonnet-4-6'

-        >>> normalize_model_for_provider("minimax-m2.5-free", "opencode-zen")
-        'minimax-m2.5-free'
-
        >>> normalize_model_for_provider("deepseek-v3", "deepseek")
        'deepseek-chat'

@@ -354,16 +351,7 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- OpenCode Zen: Claude stays hyphenated; other models keep dots ---
-    if provider == "opencode-zen":
-        bare = _strip_matching_provider_prefix(name, provider)
-        if "/" in bare:
-            return bare
-        if bare.lower().startswith("claude-"):
-            return _dots_to_hyphens(bare)
-        return bare
-
-    # --- Anthropic: strip matching provider prefix, dots -> hyphens ---
+    # --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
        bare = _strip_matching_provider_prefix(name, provider)
        if "/" in bare:
@@ -21,7 +21,6 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
 from __future__ import annotations

 import logging
-import re
 from dataclasses import dataclass
 from typing import List, NamedTuple, Optional

@@ -58,36 +57,10 @@ _HERMES_MODEL_WARNING = (
    "(Claude, GPT, Gemini, DeepSeek, etc.)."
 )

-# Match only the real Nous Research Hermes 3 / Hermes 4 chat families.
-# The previous substring check (`"hermes" in name.lower()`) false-positived on
-# unrelated local Modelfiles like ``hermes-brain:qwen3-14b-ctx16k`` that just
-# happen to carry "hermes" in their tag but are fully tool-capable.
-#
-# Positive examples the regex must match:
-#   NousResearch/Hermes-3-Llama-3.1-70B, hermes-4-405b, openrouter/hermes3:70b
-# Negative examples it must NOT match:
-#   hermes-brain:qwen3-14b-ctx16k, qwen3:14b, claude-opus-4-6
-_NOUS_HERMES_NON_AGENTIC_RE = re.compile(
-    r"(?:^|[/:])hermes[-_ ]?[34](?:[-_.:]|$)",
-    re.IGNORECASE,
-)
-
-
-def is_nous_hermes_non_agentic(model_name: str) -> bool:
-    """Return True if *model_name* is a real Nous Hermes 3/4 chat model.
-
-    Used to decide whether to surface the non-agentic warning at startup.
-    Callers in :mod:`cli.py` and here should go through this single helper
-    so the two sites don't drift.
-    """
-    if not model_name:
-        return False
-    return bool(_NOUS_HERMES_NON_AGENTIC_RE.search(model_name))
-

 def _check_hermes_model_warning(model_name: str) -> str:
-    """Return a warning string if *model_name* is a Nous Hermes 3/4 chat model."""
-    if is_nous_hermes_non_agentic(model_name):
+    """Return a warning string if *model_name* looks like a Hermes LLM model."""
+    if "hermes" in model_name.lower():
        return _HERMES_MODEL_WARNING
    return ""

@@ -70,13 +70,13 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
-        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
        "openai/gpt-5.4-mini",
+        "xiaomi/mimo-v2-pro",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
@@ -130,7 +130,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gemma-4-26b-it",
    ],
    "zai": [
-        "glm-5.1",
        "glm-5",
        "glm-5-turbo",
        "glm-4.7",
@@ -547,20 +546,6 @@ _PROVIDER_ALIASES = {
 }


-def get_default_model_for_provider(provider: str) -> str:
-    """Return the default model for a provider, or empty string if unknown.
-
-    Uses the first entry in _PROVIDER_MODELS as the default.  This is the
-    model a user would be offered first in the ``hermes model`` picker.
-
-    Used as a fallback when the user has configured a provider but never
-    selected a model (e.g. ``hermes auth add openai-codex`` without
-    ``hermes model``).
-    """
-    models = _PROVIDER_MODELS.get(provider, [])
-    return models[0] if models else ""
-
-
 def _openrouter_model_is_free(pricing: Any) -> bool:
    """Return True when both prompt and completion pricing are zero."""
    if not isinstance(pricing, dict):
@@ -459,16 +459,6 @@ def create_profile(
                    dst.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(src, dst)

-    # Seed a default SOUL.md so the user has a file to customize immediately.
-    # Skipped when the profile already has one (from --clone / --clone-all).
-    soul_path = profile_dir / "SOUL.md"
-    if not soul_path.exists():
-        try:
-            from hermes_cli.default_soul import DEFAULT_SOUL_MD
-            soul_path.write_text(DEFAULT_SOUL_MD, encoding="utf-8")
-        except Exception:
-            pass  # best-effort — don't fail profile creation over this
-
    return profile_dir


@@ -104,7 +104,7 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
        "gemma-4-31b-it", "gemma-4-26b-it",
    ],
-    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
+    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
@@ -2232,7 +2232,6 @@ def setup_gateway(config: dict):
        from hermes_cli.gateway import (
            _is_service_installed,
            _is_service_running,
-            supports_systemd_services,
            has_conflicting_systemd_units,
            install_linux_gateway_from_setup,
            print_systemd_scope_conflict_warning,
@@ -2245,18 +2244,16 @@ def setup_gateway(config: dict):

        service_installed = _is_service_installed()
        service_running = _is_service_running()
-        supports_systemd = supports_systemd_services()
-        supports_service_manager = supports_systemd or _is_macos

        print()
-        if supports_systemd and has_conflicting_systemd_units():
+        if _is_linux and has_conflicting_systemd_units():
            print_systemd_scope_conflict_warning()
            print()

        if service_running:
            if prompt_yes_no("  Restart the gateway to pick up changes?", True):
                try:
-                    if supports_systemd:
+                    if _is_linux:
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
@@ -2265,14 +2262,14 @@ def setup_gateway(config: dict):
        elif service_installed:
            if prompt_yes_no("  Start the gateway service?", True):
                try:
-                    if supports_systemd:
+                    if _is_linux:
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
                except Exception as e:
                    print_error(f"  Start failed: {e}")
-        elif supports_service_manager:
-            svc_name = "systemd" if supports_systemd else "launchd"
+        elif _is_linux or _is_macos:
+            svc_name = "systemd" if _is_linux else "launchd"
            if prompt_yes_no(
                f"  Install the gateway as a {svc_name} service? (runs in background, starts on boot)",
                True,
@@ -2280,7 +2277,7 @@ def setup_gateway(config: dict):
                try:
                    installed_scope = None
                    did_install = False
-                    if supports_systemd:
+                    if _is_linux:
                        installed_scope, did_install = install_linux_gateway_from_setup(force=False)
                    else:
                        launchd_install(force=False)
@@ -2288,7 +2285,7 @@ def setup_gateway(config: dict):
                    print()
                    if did_install and prompt_yes_no("  Start the service now?", True):
                        try:
-                            if supports_systemd:
+                            if _is_linux:
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
@@ -2299,21 +2296,12 @@ def setup_gateway(config: dict):
                    print_info("  You can try manually: hermes gateway install")
            else:
                print_info("  You can install later: hermes gateway install")
-                if supports_systemd:
+                if _is_linux:
                    print_info("  Or as a boot-time service: sudo hermes gateway install --system")
                print_info("  Or run in foreground:  hermes gateway")
        else:
-            from hermes_constants import is_container
-            if is_container():
-                print_info("Start the gateway to bring your bots online:")
-                print_info("   hermes gateway run          # Run as container main process")
-                print_info("")
-                print_info("For automatic restarts, use a Docker restart policy:")
-                print_info("   docker run --restart unless-stopped ...")
-                print_info("   docker restart <container>  # Manual restart")
-            else:
-                print_info("Start the gateway to bring your bots online:")
-                print_info("   hermes gateway              # Run in foreground")
+            print_info("Start the gateway to bring your bots online:")
+            print_info("   hermes gateway              # Run in foreground")

        print_info("━" * 50)

@@ -335,23 +335,7 @@ def do_install(identifier: str, category: str = "", force: bool = False,
    meta, bundle, _matched_source = _resolve_source_meta_and_bundle(identifier, sources)

    if not bundle:
-        # Check if any source hit GitHub API rate limit
-        rate_limited = any(
-            getattr(src, "is_rate_limited", False)
-            or getattr(getattr(src, "github", None), "is_rate_limited", False)
-            for src in sources
-        )
-        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.")
-        if rate_limited:
-            c.print(
-                "[yellow]Hint:[/] GitHub API rate limit exhausted "
-                "(unauthenticated: 60 requests/hour).\n"
-                "Set [bold]GITHUB_TOKEN[/] in your .env or install the "
-                "[bold]gh[/] CLI and run [bold]gh auth login[/] "
-                "to raise the limit to 5,000/hr.\n"
-            )
-        else:
-            c.print()
+        c.print(f"[bold red]Error:[/] Could not fetch '{identifier}' from any source.\n")
        return

    # Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
@@ -346,35 +346,23 @@ def show_status(args):
            print("  Note:         Android may stop background jobs when Termux is suspended")

    elif sys.platform.startswith('linux'):
-        from hermes_constants import is_container
-        if is_container():
-            # Docker/Podman: no systemd — check for running gateway processes
-            try:
-                from hermes_cli.gateway import find_gateway_pids
-                gateway_pids = find_gateway_pids()
-                is_active = len(gateway_pids) > 0
-            except Exception:
-                is_active = False
-            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
-            print("  Manager:      docker (foreground)")
-        else:
-            try:
-                from hermes_cli.gateway import get_service_name
-                _gw_svc = get_service_name()
-            except Exception:
-                _gw_svc = "hermes-gateway"
-            try:
-                result = subprocess.run(
-                    ["systemctl", "--user", "is-active", _gw_svc],
-                    capture_output=True,
-                    text=True,
-                    timeout=5
-                )
-                is_active = result.stdout.strip() == "active"
-            except (FileNotFoundError, subprocess.TimeoutExpired):
-                is_active = False
-            print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
-            print("  Manager:      systemd (user)")
+        try:
+            from hermes_cli.gateway import get_service_name
+            _gw_svc = get_service_name()
+        except Exception:
+            _gw_svc = "hermes-gateway"
+        try:
+            result = subprocess.run(
+                ["systemctl", "--user", "is-active", _gw_svc],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            is_active = result.stdout.strip() == "active"
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            is_active = False
+        print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
+        print("  Manager:      systemd (user)")
        
    elif sys.platform == 'darwin':
        from hermes_cli.gateway import get_launchd_label
@@ -421,21 +409,7 @@ def show_status(args):
    print(color("◆ Sessions", Colors.CYAN, Colors.BOLD))
    
    sessions_file = get_hermes_home() / "sessions" / "sessions.json"
-    # Primary: count gateway sessions from state.db
-    _session_count_shown = False
-    try:
-        from hermes_state import SessionDB
-        _sdb = SessionDB()
-        try:
-            _gw_sessions = _sdb.list_gateway_sessions()
-        finally:
-            _sdb.close()
-        print(f"  Active:       {len(_gw_sessions)} session(s)")
-        _session_count_shown = True
-    except Exception:
-        pass
-    # Fallback: sessions.json
-    if not _session_count_shown and sessions_file.exists():
+    if sessions_file.exists():
        import json
        try:
            with open(sessions_file, encoding="utf-8") as f:
@@ -1,929 +0,0 @@
-"""
-Hermes Agent — Web UI server.
-
-Provides a FastAPI backend serving the Vite/React frontend and REST API
-endpoints for managing configuration, environment variables, and sessions.
-
-Usage:
-    python -m hermes_cli.main web          # Start on http://127.0.0.1:9119
-    python -m hermes_cli.main web --port 8080
-"""
-
-import logging
-import os
-import secrets
-import sys
-import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import yaml
-
-PROJECT_ROOT = Path(__file__).parent.parent.resolve()
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.insert(0, str(PROJECT_ROOT))
-
-from hermes_cli import __version__, __release_date__
-from hermes_cli.config import (
-    DEFAULT_CONFIG,
-    OPTIONAL_ENV_VARS,
-    get_config_path,
-    get_env_path,
-    get_hermes_home,
-    load_config,
-    load_env,
-    save_config,
-    save_env_value,
-    remove_env_value,
-    check_config_version,
-    redact_key,
-)
-from gateway.status import get_running_pid, read_runtime_status
-
-try:
-    from fastapi import FastAPI, HTTPException, Request
-    from fastapi.middleware.cors import CORSMiddleware
-    from fastapi.responses import FileResponse, JSONResponse
-    from fastapi.staticfiles import StaticFiles
-    from pydantic import BaseModel
-except ImportError:
-    raise SystemExit(
-        "Web UI requires fastapi and uvicorn.\n"
-        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
-    )
-
-WEB_DIST = Path(__file__).parent / "web_dist"
-_log = logging.getLogger(__name__)
-
-app = FastAPI(title="Hermes Agent", version=__version__)
-
-# ---------------------------------------------------------------------------
-# Session token for protecting sensitive endpoints (reveal).
-# Generated fresh on every server start — dies when the process exits.
-# Injected into the SPA HTML so only the legitimate web UI can use it.
-# ---------------------------------------------------------------------------
-_SESSION_TOKEN = secrets.token_urlsafe(32)
-
-# Simple rate limiter for the reveal endpoint
-_reveal_timestamps: List[float] = []
-_REVEAL_MAX_PER_WINDOW = 5
-_REVEAL_WINDOW_SECONDS = 30
-
-# CORS: restrict to localhost origins only.  The web UI is intended to run
-# locally; binding to 0.0.0.0 with allow_origins=["*"] would let any website
-# read/modify config and secrets.
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origin_regex=r"^https?://(localhost|127\.0\.0\.1)(:\d+)?$",
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-# ---------------------------------------------------------------------------
-# Config schema — auto-generated from DEFAULT_CONFIG
-# ---------------------------------------------------------------------------
-
-# Manual overrides for fields that need select options or custom types
-_SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
-    "model": {
-        "type": "string",
-        "description": "Default model (e.g. anthropic/claude-sonnet-4.6)",
-        "category": "general",
-    },
-    "terminal.backend": {
-        "type": "select",
-        "description": "Terminal execution backend",
-        "options": ["local", "docker", "ssh", "modal", "daytona", "singularity"],
-    },
-    "terminal.modal_mode": {
-        "type": "select",
-        "description": "Modal sandbox mode",
-        "options": ["sandbox", "function"],
-    },
-    "tts.provider": {
-        "type": "select",
-        "description": "Text-to-speech provider",
-        "options": ["edge", "elevenlabs", "openai", "neutts"],
-    },
-    "stt.provider": {
-        "type": "select",
-        "description": "Speech-to-text provider",
-        "options": ["local", "openai", "mistral"],
-    },
-    "display.skin": {
-        "type": "select",
-        "description": "CLI visual theme",
-        "options": ["default", "ares", "mono", "slate"],
-    },
-    "display.resume_display": {
-        "type": "select",
-        "description": "How resumed sessions display history",
-        "options": ["minimal", "full", "off"],
-    },
-    "display.busy_input_mode": {
-        "type": "select",
-        "description": "Input behavior while agent is running",
-        "options": ["queue", "interrupt", "block"],
-    },
-    "memory.provider": {
-        "type": "select",
-        "description": "Memory provider plugin",
-        "options": ["builtin", "honcho"],
-    },
-    "approvals.mode": {
-        "type": "select",
-        "description": "Dangerous command approval mode",
-        "options": ["ask", "yolo", "deny"],
-    },
-    "context.engine": {
-        "type": "select",
-        "description": "Context management engine",
-        "options": ["default", "custom"],
-    },
-    "human_delay.mode": {
-        "type": "select",
-        "description": "Simulated typing delay mode",
-        "options": ["off", "typing", "fixed"],
-    },
-    "logging.level": {
-        "type": "select",
-        "description": "Log level for agent.log",
-        "options": ["DEBUG", "INFO", "WARNING", "ERROR"],
-    },
-    "agent.service_tier": {
-        "type": "select",
-        "description": "API service tier (OpenAI/Anthropic)",
-        "options": ["", "auto", "default", "flex"],
-    },
-    "delegation.reasoning_effort": {
-        "type": "select",
-        "description": "Reasoning effort for delegated subagents",
-        "options": ["", "low", "medium", "high"],
-    },
-}
-
-# Categories with fewer fields get merged into "general" to avoid tab sprawl.
-_CATEGORY_MERGE: Dict[str, str] = {
-    "privacy": "security",
-    "context": "agent",
-    "skills": "agent",
-    "cron": "agent",
-    "network": "agent",
-    "checkpoints": "agent",
-    "approvals": "security",
-    "human_delay": "display",
-    "smart_model_routing": "agent",
-}
-
-# Display order for tabs — unlisted categories sort alphabetically after these.
-_CATEGORY_ORDER = [
-    "general", "agent", "terminal", "display", "delegation",
-    "memory", "compression", "security", "browser", "voice",
-    "tts", "stt", "logging", "discord", "auxiliary",
-]
-
-
-def _infer_type(value: Any) -> str:
-    """Infer a UI field type from a Python value."""
-    if isinstance(value, bool):
-        return "boolean"
-    if isinstance(value, int):
-        return "number"
-    if isinstance(value, float):
-        return "number"
-    if isinstance(value, list):
-        return "list"
-    if isinstance(value, dict):
-        return "object"
-    return "string"
-
-
-def _build_schema_from_config(
-    config: Dict[str, Any],
-    prefix: str = "",
-) -> Dict[str, Dict[str, Any]]:
-    """Walk DEFAULT_CONFIG and produce a flat dot-path → field schema dict."""
-    schema: Dict[str, Dict[str, Any]] = {}
-    for key, value in config.items():
-        full_key = f"{prefix}.{key}" if prefix else key
-
-        # Skip internal / version keys
-        if full_key in ("_config_version",):
-            continue
-
-        # Category is the first path component for nested keys, or "general"
-        # for top-level scalar fields (model, toolsets, timezone, etc.).
-        if prefix:
-            category = prefix.split(".")[0]
-        elif isinstance(value, dict):
-            category = key
-        else:
-            category = "general"
-
-        if isinstance(value, dict):
-            # Recurse into nested dicts
-            schema.update(_build_schema_from_config(value, full_key))
-        else:
-            entry: Dict[str, Any] = {
-                "type": _infer_type(value),
-                "description": full_key.replace(".", " → ").replace("_", " ").title(),
-                "category": category,
-            }
-            # Apply manual overrides
-            if full_key in _SCHEMA_OVERRIDES:
-                entry.update(_SCHEMA_OVERRIDES[full_key])
-            # Merge small categories
-            entry["category"] = _CATEGORY_MERGE.get(entry["category"], entry["category"])
-            schema[full_key] = entry
-    return schema
-
-
-CONFIG_SCHEMA = _build_schema_from_config(DEFAULT_CONFIG)
-
-
-class ConfigUpdate(BaseModel):
-    config: dict
-
-
-class EnvVarUpdate(BaseModel):
-    key: str
-    value: str
-
-
-class EnvVarDelete(BaseModel):
-    key: str
-
-
-class EnvVarReveal(BaseModel):
-    key: str
-
-
-@app.get("/api/status")
-async def get_status():
-    current_ver, latest_ver = check_config_version()
-
-    gateway_pid = get_running_pid()
-    gateway_running = gateway_pid is not None
-
-    gateway_state = None
-    gateway_platforms: dict = {}
-    gateway_exit_reason = None
-    gateway_updated_at = None
-    configured_gateway_platforms: set[str] | None = None
-    try:
-        from gateway.config import load_gateway_config
-
-        gateway_config = load_gateway_config()
-        configured_gateway_platforms = {
-            platform.value for platform in gateway_config.get_connected_platforms()
-        }
-    except Exception:
-        configured_gateway_platforms = None
-
-    runtime = read_runtime_status()
-    if runtime:
-        gateway_state = runtime.get("gateway_state")
-        gateway_platforms = runtime.get("platforms") or {}
-        if configured_gateway_platforms is not None:
-            gateway_platforms = {
-                key: value
-                for key, value in gateway_platforms.items()
-                if key in configured_gateway_platforms
-            }
-        gateway_exit_reason = runtime.get("exit_reason")
-        gateway_updated_at = runtime.get("updated_at")
-        if not gateway_running:
-            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
-            gateway_platforms = {}
-
-    active_sessions = 0
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            sessions = db.list_sessions_rich(limit=50)
-            now = time.time()
-            active_sessions = sum(
-                1 for s in sessions
-                if s.get("ended_at") is None
-                and (now - s.get("last_active", s.get("started_at", 0))) < 300
-            )
-        finally:
-            db.close()
-    except Exception:
-        pass
-
-    return {
-        "version": __version__,
-        "release_date": __release_date__,
-        "hermes_home": str(get_hermes_home()),
-        "config_path": str(get_config_path()),
-        "env_path": str(get_env_path()),
-        "config_version": current_ver,
-        "latest_config_version": latest_ver,
-        "gateway_running": gateway_running,
-        "gateway_pid": gateway_pid,
-        "gateway_state": gateway_state,
-        "gateway_platforms": gateway_platforms,
-        "gateway_exit_reason": gateway_exit_reason,
-        "gateway_updated_at": gateway_updated_at,
-        "active_sessions": active_sessions,
-    }
-
-
-@app.get("/api/sessions")
-async def get_sessions():
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            sessions = db.list_sessions_rich(limit=20)
-            now = time.time()
-            for s in sessions:
-                s["is_active"] = (
-                    s.get("ended_at") is None
-                    and (now - s.get("last_active", s.get("started_at", 0))) < 300
-                )
-            return sessions
-        finally:
-            db.close()
-    except Exception as e:
-        _log.exception("GET /api/sessions failed")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@app.get("/api/sessions/search")
-async def search_sessions(q: str = "", limit: int = 20):
-    """Full-text search across session message content using FTS5."""
-    if not q or not q.strip():
-        return {"results": []}
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            # Auto-add prefix wildcards so partial words match
-            # e.g. "nimb" → "nimb*" matches "nimby"
-            # Preserve quoted phrases and existing wildcards as-is
-            import re
-            terms = []
-            for token in re.findall(r'"[^"]*"|\S+', q.strip()):
-                if token.startswith('"') or token.endswith("*"):
-                    terms.append(token)
-                else:
-                    terms.append(token + "*")
-            prefix_query = " ".join(terms)
-            matches = db.search_messages(query=prefix_query, limit=limit)
-            # Group by session_id — return unique sessions with their best snippet
-            seen: dict = {}
-            for m in matches:
-                sid = m["session_id"]
-                if sid not in seen:
-                    seen[sid] = {
-                        "session_id": sid,
-                        "snippet": m.get("snippet", ""),
-                        "role": m.get("role"),
-                        "source": m.get("source"),
-                        "model": m.get("model"),
-                        "session_started": m.get("session_started"),
-                    }
-            return {"results": list(seen.values())}
-        finally:
-            db.close()
-    except Exception:
-        _log.exception("GET /api/sessions/search failed")
-        raise HTTPException(status_code=500, detail="Search failed")
-
-
-def _normalize_config_for_web(config: Dict[str, Any]) -> Dict[str, Any]:
-    """Normalize config for the web UI.
-
-    Hermes supports ``model`` as either a bare string (``"anthropic/claude-sonnet-4"``)
-    or a dict (``{default: ..., provider: ..., base_url: ...}``).  The schema is built
-    from DEFAULT_CONFIG where ``model`` is a string, but user configs often have the
-    dict form.  Normalize to the string form so the frontend schema matches.
-    """
-    config = dict(config)  # shallow copy
-    model_val = config.get("model")
-    if isinstance(model_val, dict):
-        config["model"] = model_val.get("default", model_val.get("name", ""))
-    return config
-
-
-@app.get("/api/config")
-async def get_config():
-    config = _normalize_config_for_web(load_config())
-    # Strip internal keys that the frontend shouldn't see or send back
-    return {k: v for k, v in config.items() if not k.startswith("_")}
-
-
-@app.get("/api/config/defaults")
-async def get_defaults():
-    return DEFAULT_CONFIG
-
-
-@app.get("/api/config/schema")
-async def get_schema():
-    return {"fields": CONFIG_SCHEMA, "category_order": _CATEGORY_ORDER}
-
-
-def _denormalize_config_from_web(config: Dict[str, Any]) -> Dict[str, Any]:
-    """Reverse _normalize_config_for_web before saving.
-
-    Reconstructs ``model`` as a dict by reading the current on-disk config
-    to recover model subkeys (provider, base_url, api_mode, etc.) that were
-    stripped from the GET response.  The frontend only sees model as a flat
-    string; the rest is preserved transparently.
-    """
-    config = dict(config)
-    # Remove any _model_meta that might have leaked in (shouldn't happen
-    # with the stripped GET response, but be defensive)
-    config.pop("_model_meta", None)
-
-    model_val = config.get("model")
-    if isinstance(model_val, str) and model_val:
-        # Read the current disk config to recover model subkeys
-        try:
-            disk_config = load_config()
-            disk_model = disk_config.get("model")
-            if isinstance(disk_model, dict):
-                # Preserve all subkeys, update default with the new value
-                disk_model["default"] = model_val
-                config["model"] = disk_model
-        except Exception:
-            pass  # can't read disk config — just use the string form
-    return config
-
-
-@app.put("/api/config")
-async def update_config(body: ConfigUpdate):
-    try:
-        save_config(_denormalize_config_from_web(body.config))
-        return {"ok": True}
-    except Exception as e:
-        _log.exception("PUT /api/config failed")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@app.get("/api/auth/session-token")
-async def get_session_token():
-    """Return the ephemeral session token for this server instance.
-
-    The token protects sensitive endpoints (reveal).  It's served to the SPA
-    which stores it in memory — it's never persisted and dies when the server
-    process exits.  CORS already restricts this to localhost origins.
-    """
-    return {"token": _SESSION_TOKEN}
-
-
-@app.get("/api/env")
-async def get_env_vars():
-    env_on_disk = load_env()
-    result = {}
-    for var_name, info in OPTIONAL_ENV_VARS.items():
-        value = env_on_disk.get(var_name)
-        result[var_name] = {
-            "is_set": bool(value),
-            "redacted_value": redact_key(value) if value else None,
-            "description": info.get("description", ""),
-            "url": info.get("url"),
-            "category": info.get("category", ""),
-            "is_password": info.get("password", False),
-            "tools": info.get("tools", []),
-            "advanced": info.get("advanced", False),
-        }
-    return result
-
-
-@app.put("/api/env")
-async def set_env_var(body: EnvVarUpdate):
-    try:
-        save_env_value(body.key, body.value)
-        return {"ok": True, "key": body.key}
-    except Exception as e:
-        _log.exception("PUT /api/env failed")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@app.delete("/api/env")
-async def remove_env_var(body: EnvVarDelete):
-    try:
-        removed = remove_env_value(body.key)
-        if not removed:
-            raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
-        return {"ok": True, "key": body.key}
-    except HTTPException:
-        raise
-    except Exception as e:
-        _log.exception("DELETE /api/env failed")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
-@app.post("/api/env/reveal")
-async def reveal_env_var(body: EnvVarReveal, request: Request):
-    """Return the real (unredacted) value of a single env var.
-
-    Protected by:
-    - Ephemeral session token (generated per server start, injected into SPA)
-    - Rate limiting (max 5 reveals per 30s window)
-    - Audit logging
-    """
-    # --- Token check ---
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
-
-    # --- Rate limit ---
-    now = time.time()
-    cutoff = now - _REVEAL_WINDOW_SECONDS
-    _reveal_timestamps[:] = [t for t in _reveal_timestamps if t > cutoff]
-    if len(_reveal_timestamps) >= _REVEAL_MAX_PER_WINDOW:
-        raise HTTPException(status_code=429, detail="Too many reveal requests. Try again shortly.")
-    _reveal_timestamps.append(now)
-
-    # --- Reveal ---
-    env_on_disk = load_env()
-    value = env_on_disk.get(body.key)
-    if value is None:
-        raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
-
-    _log.info("env/reveal: %s", body.key)
-    return {"key": body.key, "value": value}
-
-
-# ---------------------------------------------------------------------------
-# Session detail endpoints
-# ---------------------------------------------------------------------------
-
-
-@app.get("/api/sessions/{session_id}")
-async def get_session_detail(session_id: str):
-    from hermes_state import SessionDB
-    db = SessionDB()
-    try:
-        sid = db.resolve_session_id(session_id)
-        session = db.get_session(sid) if sid else None
-        if not session:
-            raise HTTPException(status_code=404, detail="Session not found")
-        return session
-    finally:
-        db.close()
-
-
-@app.get("/api/sessions/{session_id}/messages")
-async def get_session_messages(session_id: str):
-    from hermes_state import SessionDB
-    db = SessionDB()
-    try:
-        sid = db.resolve_session_id(session_id)
-        if not sid:
-            raise HTTPException(status_code=404, detail="Session not found")
-        messages = db.get_messages(sid)
-        return {"session_id": sid, "messages": messages}
-    finally:
-        db.close()
-
-
-@app.delete("/api/sessions/{session_id}")
-async def delete_session_endpoint(session_id: str):
-    from hermes_state import SessionDB
-    db = SessionDB()
-    try:
-        if not db.delete_session(session_id):
-            raise HTTPException(status_code=404, detail="Session not found")
-        return {"ok": True}
-    finally:
-        db.close()
-
-
-# ---------------------------------------------------------------------------
-# Log viewer endpoint
-# ---------------------------------------------------------------------------
-
-
-@app.get("/api/logs")
-async def get_logs(
-    file: str = "agent",
-    lines: int = 100,
-    level: Optional[str] = None,
-    component: Optional[str] = None,
-):
-    from hermes_cli.logs import _read_tail, LOG_FILES
-
-    log_name = LOG_FILES.get(file)
-    if not log_name:
-        raise HTTPException(status_code=400, detail=f"Unknown log file: {file}")
-    log_path = get_hermes_home() / "logs" / log_name
-    if not log_path.exists():
-        return {"file": file, "lines": []}
-
-    try:
-        from hermes_logging import COMPONENT_PREFIXES
-    except ImportError:
-        COMPONENT_PREFIXES = {}
-
-    has_filters = bool(level or component)
-    comp_prefixes = COMPONENT_PREFIXES.get(component, ()) if component else ()
-    result = _read_tail(
-        log_path, min(lines, 500),
-        has_filters=has_filters,
-        min_level=level,
-        component_prefixes=comp_prefixes,
-    )
-    return {"file": file, "lines": result}
-
-
-# ---------------------------------------------------------------------------
-# Cron job management endpoints
-# ---------------------------------------------------------------------------
-
-
-class CronJobCreate(BaseModel):
-    prompt: str
-    schedule: str
-    name: str = ""
-    deliver: str = "local"
-
-
-class CronJobUpdate(BaseModel):
-    updates: dict
-
-
-@app.get("/api/cron/jobs")
-async def list_cron_jobs():
-    from cron.jobs import list_jobs
-    return list_jobs(include_disabled=True)
-
-
-@app.get("/api/cron/jobs/{job_id}")
-async def get_cron_job(job_id: str):
-    from cron.jobs import get_job
-    job = get_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail="Job not found")
-    return job
-
-
-@app.post("/api/cron/jobs")
-async def create_cron_job(body: CronJobCreate):
-    from cron.jobs import create_job
-    try:
-        job = create_job(prompt=body.prompt, schedule=body.schedule,
-                         name=body.name, deliver=body.deliver)
-        return job
-    except Exception as e:
-        _log.exception("POST /api/cron/jobs failed")
-        raise HTTPException(status_code=400, detail=str(e))
-
-
-@app.put("/api/cron/jobs/{job_id}")
-async def update_cron_job(job_id: str, body: CronJobUpdate):
-    from cron.jobs import update_job
-    job = update_job(job_id, body.updates)
-    if not job:
-        raise HTTPException(status_code=404, detail="Job not found")
-    return job
-
-
-@app.post("/api/cron/jobs/{job_id}/pause")
-async def pause_cron_job(job_id: str):
-    from cron.jobs import pause_job
-    job = pause_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail="Job not found")
-    return job
-
-
-@app.post("/api/cron/jobs/{job_id}/resume")
-async def resume_cron_job(job_id: str):
-    from cron.jobs import resume_job
-    job = resume_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail="Job not found")
-    return job
-
-
-@app.post("/api/cron/jobs/{job_id}/trigger")
-async def trigger_cron_job(job_id: str):
-    from cron.jobs import trigger_job
-    job = trigger_job(job_id)
-    if not job:
-        raise HTTPException(status_code=404, detail="Job not found")
-    return job
-
-
-@app.delete("/api/cron/jobs/{job_id}")
-async def delete_cron_job(job_id: str):
-    from cron.jobs import remove_job
-    if not remove_job(job_id):
-        raise HTTPException(status_code=404, detail="Job not found")
-    return {"ok": True}
-
-
-# ---------------------------------------------------------------------------
-# Skills & Tools endpoints
-# ---------------------------------------------------------------------------
-
-
-class SkillToggle(BaseModel):
-    name: str
-    enabled: bool
-
-
-@app.get("/api/skills")
-async def get_skills():
-    from tools.skills_tool import _find_all_skills
-    from hermes_cli.skills_config import get_disabled_skills
-    config = load_config()
-    disabled = get_disabled_skills(config)
-    skills = _find_all_skills(skip_disabled=True)
-    for s in skills:
-        s["enabled"] = s["name"] not in disabled
-    return skills
-
-
-@app.put("/api/skills/toggle")
-async def toggle_skill(body: SkillToggle):
-    from hermes_cli.skills_config import get_disabled_skills, save_disabled_skills
-    config = load_config()
-    disabled = get_disabled_skills(config)
-    if body.enabled:
-        disabled.discard(body.name)
-    else:
-        disabled.add(body.name)
-    save_disabled_skills(config, disabled)
-    return {"ok": True, "name": body.name, "enabled": body.enabled}
-
-
-@app.get("/api/tools/toolsets")
-async def get_toolsets():
-    from hermes_cli.tools_config import (
-        _get_effective_configurable_toolsets,
-        _get_platform_tools,
-        _toolset_has_keys,
-    )
-    from toolsets import resolve_toolset
-
-    config = load_config()
-    enabled_toolsets = _get_platform_tools(
-        config,
-        "cli",
-        include_default_mcp_servers=False,
-    )
-    result = []
-    for name, label, desc in _get_effective_configurable_toolsets():
-        try:
-            tools = sorted(set(resolve_toolset(name)))
-        except Exception:
-            tools = []
-        is_enabled = name in enabled_toolsets
-        result.append({
-            "name": name, "label": label, "description": desc,
-            "enabled": is_enabled,
-            "available": is_enabled,
-            "configured": _toolset_has_keys(name, config),
-            "tools": tools,
-        })
-    return result
-
-
-# ---------------------------------------------------------------------------
-# Raw YAML config endpoint
-# ---------------------------------------------------------------------------
-
-
-class RawConfigUpdate(BaseModel):
-    yaml_text: str
-
-
-@app.get("/api/config/raw")
-async def get_config_raw():
-    path = get_config_path()
-    if not path.exists():
-        return {"yaml": ""}
-    return {"yaml": path.read_text(encoding="utf-8")}
-
-
-@app.put("/api/config/raw")
-async def update_config_raw(body: RawConfigUpdate):
-    try:
-        parsed = yaml.safe_load(body.yaml_text)
-        if not isinstance(parsed, dict):
-            raise HTTPException(status_code=400, detail="YAML must be a mapping")
-        save_config(parsed)
-        return {"ok": True}
-    except yaml.YAMLError as e:
-        raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}")
-
-
-# ---------------------------------------------------------------------------
-# Token / cost analytics endpoint
-# ---------------------------------------------------------------------------
-
-
-@app.get("/api/analytics/usage")
-async def get_usage_analytics(days: int = 30):
-    from hermes_state import SessionDB
-    db = SessionDB()
-    try:
-        cutoff = time.time() - (days * 86400)
-        cur = db._conn.execute("""
-            SELECT date(started_at, 'unixepoch') as day,
-                   SUM(input_tokens) as input_tokens,
-                   SUM(output_tokens) as output_tokens,
-                   SUM(cache_read_tokens) as cache_read_tokens,
-                   SUM(reasoning_tokens) as reasoning_tokens,
-                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
-            FROM sessions WHERE started_at > ?
-            GROUP BY day ORDER BY day
-        """, (cutoff,))
-        daily = [dict(r) for r in cur.fetchall()]
-
-        cur2 = db._conn.execute("""
-            SELECT model,
-                   SUM(input_tokens) as input_tokens,
-                   SUM(output_tokens) as output_tokens,
-                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
-            FROM sessions WHERE started_at > ? AND model IS NOT NULL
-            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
-        """, (cutoff,))
-        by_model = [dict(r) for r in cur2.fetchall()]
-
-        cur3 = db._conn.execute("""
-            SELECT SUM(input_tokens) as total_input,
-                   SUM(output_tokens) as total_output,
-                   SUM(cache_read_tokens) as total_cache_read,
-                   SUM(reasoning_tokens) as total_reasoning,
-                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
-                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
-            FROM sessions WHERE started_at > ?
-        """, (cutoff,))
-        totals = dict(cur3.fetchone())
-
-        return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days}
-    finally:
-        db.close()
-
-
-def mount_spa(application: FastAPI):
-    """Mount the built SPA. Falls back to index.html for client-side routing."""
-    if not WEB_DIST.exists():
-        @application.get("/{full_path:path}")
-        async def no_frontend(full_path: str):
-            return JSONResponse(
-                {"error": "Frontend not built. Run: cd web && npm run build"},
-                status_code=404,
-            )
-        return
-
-    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")
-
-    @application.get("/{full_path:path}")
-    async def serve_spa(full_path: str):
-        file_path = WEB_DIST / full_path
-        # Prevent path traversal via url-encoded sequences (%2e%2e/)
-        if (
-            full_path
-            and file_path.resolve().is_relative_to(WEB_DIST.resolve())
-            and file_path.exists()
-            and file_path.is_file()
-        ):
-            return FileResponse(file_path)
-        return FileResponse(
-            WEB_DIST / "index.html",
-            headers={"Cache-Control": "no-store, no-cache, must-revalidate"},
-        )
-
-
-mount_spa(app)
-
-
-def start_server(host: str = "127.0.0.1", port: int = 9119, open_browser: bool = True):
-    """Start the web UI server."""
-    import uvicorn
-
-    if host not in ("127.0.0.1", "localhost", "::1"):
-        import logging
-        logging.warning(
-            "Binding to %s — the web UI exposes config and API keys. "
-            "Only bind to non-localhost if you trust all users on the network.", host,
-        )
-
-    if open_browser:
-        import threading
-        import webbrowser
-
-        def _open():
-            import time as _t
-            _t.sleep(1.0)
-            webbrowser.open(f"http://{host}:{port}")
-
-        threading.Thread(target=_open, daemon=True).start()
-
-    print(f"  Hermes Web UI → http://{host}:{port}")
-    uvicorn.run(app, host=host, port=port, log_level="warning")
@@ -189,37 +189,6 @@ def is_wsl() -> bool:
    return _wsl_detected


-_container_detected: bool | None = None
-
-
-def is_container() -> bool:
-    """Return True when running inside a Docker/Podman container.
-
-    Checks ``/.dockerenv`` (Docker), ``/run/.containerenv`` (Podman),
-    and ``/proc/1/cgroup`` for container runtime markers.  Result is
-    cached for the process lifetime.  Import-safe — no heavy deps.
-    """
-    global _container_detected
-    if _container_detected is not None:
-        return _container_detected
-    if os.path.exists("/.dockerenv"):
-        _container_detected = True
-        return True
-    if os.path.exists("/run/.containerenv"):
-        _container_detected = True
-        return True
-    try:
-        with open("/proc/1/cgroup", "r") as f:
-            cgroup = f.read()
-            if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
-                _container_detected = True
-                return True
-    except OSError:
-        pass
-    _container_detected = False
-    return False
-
-
 # ─── Well-Known Paths ─────────────────────────────────────────────────────────


@@ -31,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 7
+SCHEMA_VERSION = 6

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,12 +65,6 @@ CREATE TABLE IF NOT EXISTS sessions (
    cost_source TEXT,
    pricing_version TEXT,
    title TEXT,
-    session_key TEXT,
-    platform TEXT,
-    chat_type TEXT,
-    origin_json TEXT,
-    display_name TEXT,
-    memory_flushed INTEGER DEFAULT 0,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -335,36 +329,6 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 6")
-            if current_version < 7:
-                # v7: add gateway routing metadata columns to sessions table.
-                # These columns allow state.db to serve as the single source of
-                # truth for session discovery, replacing sessions.json reads.
-                for col_name, col_type in [
-                    ("session_key", "TEXT"),
-                    ("platform", "TEXT"),
-                    ("chat_type", "TEXT"),
-                    ("origin_json", "TEXT"),
-                    ("display_name", "TEXT"),
-                    ("memory_flushed", "INTEGER DEFAULT 0"),
-                ]:
-                    try:
-                        safe = col_name.replace('"', '""')
-                        cursor.execute(
-                            f'ALTER TABLE sessions ADD COLUMN "{safe}" {col_type}'
-                        )
-                    except sqlite3.OperationalError:
-                        pass  # Column already exists
-                # Create index on session_key for fast lookups
-                try:
-                    cursor.execute(
-                        "CREATE INDEX IF NOT EXISTS idx_sessions_session_key "
-                        "ON sessions(session_key)"
-                    )
-                except sqlite3.OperationalError:
-                    pass
-                # Backfill from sessions.json if it exists
-                self._backfill_gateway_metadata_v7(cursor)
-                cursor.execute("UPDATE schema_version SET version = 7")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -376,15 +340,6 @@ class SessionDB:
        except sqlite3.OperationalError:
            pass  # Index already exists

-        # session_key index for gateway metadata lookups
-        try:
-            cursor.execute(
-                "CREATE INDEX IF NOT EXISTS idx_sessions_session_key "
-                "ON sessions(session_key)"
-            )
-        except sqlite3.OperationalError:
-            pass
-
        # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably)
        try:
            cursor.execute("SELECT * FROM messages_fts LIMIT 0")
@@ -393,37 +348,6 @@ class SessionDB:

        self._conn.commit()

-    def _backfill_gateway_metadata_v7(self, cursor):
-        """Backfill gateway routing metadata from sessions.json during v7 migration."""
-        try:
-            sessions_dir = get_hermes_home() / "sessions"
-            sessions_file = sessions_dir / "sessions.json"
-            if not sessions_file.exists():
-                return
-            with open(sessions_file, "r", encoding="utf-8") as f:
-                data = json.load(f)
-            for _key, entry in data.items():
-                session_id = entry.get("session_id", "")
-                if not session_id:
-                    continue
-                session_key = entry.get("session_key", _key)
-                platform = entry.get("platform", "")
-                chat_type = entry.get("chat_type", "dm")
-                display_name = entry.get("display_name")
-                origin = entry.get("origin")
-                origin_json = json.dumps(origin) if origin else None
-                memory_flushed = 1 if entry.get("memory_flushed") else 0
-                cursor.execute(
-                    """UPDATE sessions SET
-                        session_key = ?, platform = ?, chat_type = ?,
-                        origin_json = ?, display_name = ?, memory_flushed = ?
-                    WHERE id = ? AND session_key IS NULL""",
-                    (session_key, platform, chat_type, origin_json,
-                     display_name, memory_flushed, session_id),
-                )
-        except Exception as e:
-            logger.debug("v7 backfill from sessions.json failed (non-fatal): %s", e)
-
    # =========================================================================
    # Session lifecycle
    # =========================================================================
@@ -458,112 +382,6 @@ class SessionDB:
        self._execute_write(_do)
        return session_id

-    def set_gateway_metadata(
-        self,
-        session_id: str,
-        session_key: str = None,
-        platform: str = None,
-        chat_type: str = None,
-        origin_json: str = None,
-        display_name: str = None,
-    ) -> None:
-        """Write gateway routing metadata for a session.
-
-        Called by the gateway after creating or resuming a session so that
-        state.db becomes the single source of truth for session discovery.
-        Uses UPDATE (not UPSERT) — the session row must already exist.
-        """
-        sets = []
-        params = []
-        if session_key is not None:
-            sets.append("session_key = ?")
-            params.append(session_key)
-        if platform is not None:
-            sets.append("platform = ?")
-            params.append(platform)
-        if chat_type is not None:
-            sets.append("chat_type = ?")
-            params.append(chat_type)
-        if origin_json is not None:
-            sets.append("origin_json = ?")
-            params.append(origin_json)
-        if display_name is not None:
-            sets.append("display_name = ?")
-            params.append(display_name)
-        if not sets:
-            return
-        params.append(session_id)
-        sql = f"UPDATE sessions SET {', '.join(sets)} WHERE id = ?"
-
-        def _do(conn):
-            conn.execute(sql, params)
-        self._execute_write(_do)
-
-    def set_memory_flushed(self, session_id: str, flushed: bool = True) -> None:
-        """Mark a session as having its memory flushed."""
-        def _do(conn):
-            conn.execute(
-                "UPDATE sessions SET memory_flushed = ? WHERE id = ?",
-                (1 if flushed else 0, session_id),
-            )
-        self._execute_write(_do)
-
-    def list_gateway_sessions(self, platform: str = None) -> List[Dict[str, Any]]:
-        """List sessions that have gateway routing metadata.
-
-        Returns dicts with: id, session_key, platform, chat_type,
-        origin_json, display_name, source, started_at, ended_at, title,
-        message_count, memory_flushed.
-
-        When ``platform`` is given, only sessions for that platform are returned.
-        Only sessions with a non-NULL session_key are included (i.e. sessions
-        that were created through the gateway, not bare CLI sessions).
-        """
-        where = "WHERE session_key IS NOT NULL"
-        params = []
-        if platform:
-            where += " AND platform = ?"
-            params.append(platform)
-        with self._lock:
-            rows = self._conn.execute(
-                f"""SELECT id, session_key, platform, chat_type, origin_json,
-                       display_name, source, started_at, ended_at, title,
-                       message_count, memory_flushed
-                FROM sessions {where}
-                ORDER BY started_at DESC""",
-                params,
-            ).fetchall()
-        return [dict(r) for r in rows]
-
-    def find_session_by_origin(
-        self, platform: str, chat_id: str, thread_id: str = None,
-    ) -> Optional[Dict[str, Any]]:
-        """Find the most recent session for a platform + chat_id pair.
-
-        Searches the origin_json column for matching chat_id.  When
-        ``thread_id`` is given, also matches on thread_id.  Returns the
-        session dict or None.
-        """
-        # Use JSON extraction for matching.  SQLite json_extract is
-        # available in all modern builds (3.9+).
-        sql = """
-            SELECT id, session_key, platform, chat_type, origin_json,
-                   display_name, source, started_at, ended_at, title,
-                   memory_flushed
-            FROM sessions
-            WHERE platform = ?
-              AND json_extract(origin_json, '$.chat_id') = ?
-              AND session_key IS NOT NULL
-        """
-        params: list = [platform, str(chat_id)]
-        if thread_id is not None:
-            sql += " AND json_extract(origin_json, '$.thread_id') = ?"
-            params.append(str(thread_id))
-        sql += " ORDER BY started_at DESC LIMIT 1"
-        with self._lock:
-            row = self._conn.execute(sql, params).fetchone()
-        return dict(row) if row else None
-
    def end_session(self, session_id: str, end_reason: str) -> None:
        """Mark a session as ended."""
        def _do(conn):
@@ -79,45 +79,11 @@ def _get_session_db():


 def _load_sessions_index() -> dict:
-    """Load gateway session metadata from state.db.
+    """Load the gateway sessions.json index directly.

    Returns a dict of session_key -> entry_dict with platform routing info.
-    Falls back to reading sessions.json when state.db has no gateway metadata
-    (pre-migration databases).
+    This avoids importing the full SessionStore which needs GatewayConfig.
    """
-    try:
-        from hermes_state import SessionDB
-        db = SessionDB()
-        try:
-            rows = db.list_gateway_sessions()
-        finally:
-            db.close()
-        if rows:
-            result = {}
-            for row in rows:
-                sk = row.get("session_key")
-                if not sk:
-                    continue
-                entry = {
-                    "session_key": sk,
-                    "session_id": row.get("id", ""),
-                    "platform": row.get("platform", ""),
-                    "chat_type": row.get("chat_type", "dm"),
-                    "display_name": row.get("display_name"),
-                    "memory_flushed": bool(row.get("memory_flushed", 0)),
-                }
-                origin_json = row.get("origin_json")
-                if origin_json:
-                    try:
-                        entry["origin"] = json.loads(origin_json)
-                    except (json.JSONDecodeError, TypeError):
-                        pass
-                result[sk] = entry
-            return result
-    except Exception as e:
-        logger.debug("Failed to load sessions from state.db: %s", e)
-
-    # Fallback: read sessions.json for pre-migration databases
    sessions_file = _get_sessions_dir() / "sessions.json"
    if not sessions_file.exists():
        return {}
@@ -234,7 +200,8 @@ class EventBridge:
        self._last_poll_timestamps: Dict[str, float] = {}  # session_key -> unix timestamp
        # In-memory approval tracking (populated from events)
        self._pending_approvals: Dict[str, dict] = {}
-        # mtime cache — skip expensive work when state.db hasn't changed
+        # mtime cache — skip expensive work when files haven't changed
+        self._sessions_json_mtime: float = 0.0
        self._state_db_mtime: float = 0.0
        self._cached_sessions_index: dict = {}

@@ -360,10 +327,21 @@ class EventBridge:
    def _poll_once(self, db):
        """Check for new messages across all sessions.

-        Uses mtime check on state.db to skip work when nothing has changed
-        — makes 200ms polling essentially free.
+        Uses mtime checks on sessions.json and state.db to skip work
+        when nothing has changed — makes 200ms polling essentially free.
        """
-        # Check if state.db has changed (mtime check is ~1μs)
+        # Check if sessions.json has changed (mtime check is ~1μs)
+        sessions_file = _get_sessions_dir() / "sessions.json"
+        try:
+            sj_mtime = sessions_file.stat().st_mtime if sessions_file.exists() else 0.0
+        except OSError:
+            sj_mtime = 0.0
+
+        if sj_mtime != self._sessions_json_mtime:
+            self._sessions_json_mtime = sj_mtime
+            self._cached_sessions_index = _load_sessions_index()
+
+        # Check if state.db has changed
        try:
            from hermes_constants import get_hermes_home
            db_file = get_hermes_home() / "state.db"
@@ -375,13 +353,10 @@ class EventBridge:
        except OSError:
            db_mtime = 0.0

-        if db_mtime == self._state_db_mtime:
+        if db_mtime == self._state_db_mtime and sj_mtime == self._sessions_json_mtime:
            return  # Nothing changed since last poll — skip entirely

        self._state_db_mtime = db_mtime
-        # Reload the session index from state.db on every change since
-        # new sessions may have been created.
-        self._cached_sessions_index = _load_sessions_index()
        entries = self._cached_sessions_index

        for session_key, entry in entries.items():
@@ -19,9 +19,6 @@
    "agent-browser": "^0.13.0",
    "@askjo/camoufox-browser": "^1.0.0"
  },
-  "overrides": {
-    "lodash": "4.18.1"
-  },
  "engines": {
    "node": ">=18.0.0"
  }
@@ -76,7 +76,6 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 feishu = ["lark-oapi>=1.5.3,<2"]
-web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
@@ -108,7 +107,6 @@ all = [
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
  "hermes-agent[mistral]",
-  "hermes-agent[web]",
 ]

 [project.scripts]
@@ -119,9 +117,6 @@ hermes-acp = "acp_adapter.entry:main"
 [tool.setuptools]
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]

-[tool.setuptools.package-data]
-hermes_cli = ["web_dist/**/*"]
-
 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

@@ -94,7 +94,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
@@ -1307,7 +1307,6 @@ class AIAgent:
                api_key=getattr(self, "api_key", ""),
                config_context_length=_config_context_length,
                provider=self.provider,
-                api_mode=self.api_mode,
            )
        self.compression_enabled = compression_enabled

@@ -1564,7 +1563,6 @@ class AIAgent:
                base_url=self.base_url,
                api_key=getattr(self, "api_key", ""),
                provider=self.provider,
-                api_mode=self.api_mode,
            )

        # ── Invalidate cached system prompt so it rebuilds next turn ──
@@ -1698,16 +1696,6 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

-    def _current_main_runtime(self) -> Dict[str, str]:
-        """Return the live main runtime for session-scoped auxiliary routing."""
-        return {
-            "model": getattr(self, "model", "") or "",
-            "provider": getattr(self, "provider", "") or "",
-            "base_url": getattr(self, "base_url", "") or "",
-            "api_key": getattr(self, "api_key", "") or "",
-            "api_mode": getattr(self, "api_mode", "") or "",
-        }
-
    def _check_compression_model_feasibility(self) -> None:
        """Warn at session start if the auxiliary compression model's context
        window is smaller than the main model's compression threshold.
@@ -1728,10 +1716,7 @@ class AIAgent:
            from agent.auxiliary_client import get_text_auxiliary_client
            from agent.model_metadata import get_model_context_length

-            client, aux_model = get_text_auxiliary_client(
-                "compression",
-                main_runtime=self._current_main_runtime(),
-            )
+            client, aux_model = get_text_auxiliary_client("compression")
            if client is None or not aux_model:
                msg = (
                    "⚠ No auxiliary LLM provider configured — context "
@@ -1748,25 +1733,10 @@ class AIAgent:

            aux_base_url = str(getattr(client, "base_url", ""))
            aux_api_key = str(getattr(client, "api_key", ""))
-
-            # Read user-configured context_length for the compression model.
-            # Custom endpoints often don't support /models API queries so
-            # get_model_context_length() falls through to the 128K default,
-            # ignoring the explicit config value.  Pass it as the highest-
-            # priority hint so the configured value is always respected.
-            _aux_cfg = (self.config or {}).get("auxiliary", {}).get("compression", {})
-            _aux_context_config = _aux_cfg.get("context_length") if isinstance(_aux_cfg, dict) else None
-            if _aux_context_config is not None:
-                try:
-                    _aux_context_config = int(_aux_context_config)
-                except (TypeError, ValueError):
-                    _aux_context_config = None
-
            aux_context = get_model_context_length(
                aux_model,
                base_url=aux_base_url,
                api_key=aux_api_key,
-                config_context_length=_aux_context_config,
            )

            threshold = self.context_compressor.threshold_tokens
@@ -1887,13 +1857,12 @@ class AIAgent:
        if not content:
            return ""
        # Strip all reasoning tag variants: <think>, <thinking>, <THINKING>,
-        # <reasoning>, <REASONING_SCRATCHPAD>, <thought> (Gemma 4)
+        # <reasoning>, <REASONING_SCRATCHPAD>
        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        content = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL)
-        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
+        content = re.sub(r'</?(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
        return content

    def _looks_like_codex_intermediate_ack(
@@ -3209,12 +3178,6 @@ class AIAgent:
                f"not on any model name returned by the API."
            )

-        # Environment hints (WSL, Termux, etc.) — tell the agent about the
-        # execution environment so it can translate paths and adapt behavior.
-        _env_hints = build_environment_hints()
-        if _env_hints:
-            prompt_parts.append(_env_hints)
-
        platform_key = (self.platform or "").lower().strip()
        if platform_key in PLATFORM_HINTS:
            prompt_parts.append(PLATFORM_HINTS[platform_key])
@@ -5391,22 +5354,13 @@ class AIAgent:
                # a new API call, creating a duplicate message.  Return a
                # partial "stop" response instead so the outer loop treats this
                # turn as complete (no retry, no fallback).
-                # Recover whatever content was already streamed to the user.
-                # _current_streamed_assistant_text accumulates text fired
-                # through _fire_stream_delta, so it has exactly what the
-                # user saw before the connection died.
-                _partial_text = (
-                    getattr(self, "_current_streamed_assistant_text", "") or ""
-                ).strip() or None
                logger.warning(
                    "Partial stream delivered before error; returning stub "
-                    "response with %s chars of recovered content to prevent "
-                    "duplicate messages: %s",
-                    len(_partial_text or ""),
+                    "response to prevent duplicate messages: %s",
                    result["error"],
                )
                _stub_msg = SimpleNamespace(
-                    role="assistant", content=_partial_text, tool_calls=None,
+                    role="assistant", content=None, tool_calls=None,
                    reasoning_content=None,
                )
                return SimpleNamespace(
@@ -5865,12 +5819,11 @@ class AIAgent:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
        MiniMax keeps dots (e.g. MiniMax-M2.7).
-        OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free).
-        ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai"}:
+        OpenCode Go keeps dots (e.g. minimax-m2.7)."""
+        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go"}:
            return True
        base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base
+        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/go" in base

    def _is_qwen_portal(self) -> bool:
        """Return True when the base URL targets Qwen Portal."""
@@ -8250,8 +8203,7 @@ class AIAgent:
                        if self.thinking_callback:
                            self.thinking_callback("")
                        
-                        # Invalid response — could be rate limiting, provider timeout,
-                        # upstream server error, or malformed response.
+                        # This is often rate limiting or provider returning malformed response
                        retry_count += 1
                        
                        # Eager fallback: empty/malformed responses are a common
@@ -8287,44 +8239,11 @@ class AIAgent:
                            if self.verbose_logging:
                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
                        
-                        # Extract error code from response for contextual diagnostics
-                        _resp_error_code = None
-                        if response and hasattr(response, 'error') and response.error:
-                            _code_raw = getattr(response.error, 'code', None)
-                            if _code_raw is None and isinstance(response.error, dict):
-                                _code_raw = response.error.get('code')
-                            if _code_raw is not None:
-                                try:
-                                    _resp_error_code = int(_code_raw)
-                                except (TypeError, ValueError):
-                                    pass
-
-                        # Build a human-readable failure hint from the error code
-                        # and response time, instead of always assuming rate limiting.
-                        if _resp_error_code == 524:
-                            _failure_hint = f"upstream provider timed out (Cloudflare 524, {api_duration:.0f}s)"
-                        elif _resp_error_code == 504:
-                            _failure_hint = f"upstream gateway timeout (504, {api_duration:.0f}s)"
-                        elif _resp_error_code == 429:
-                            _failure_hint = f"rate limited by upstream provider (429)"
-                        elif _resp_error_code in (500, 502):
-                            _failure_hint = f"upstream server error ({_resp_error_code}, {api_duration:.0f}s)"
-                        elif _resp_error_code in (503, 529):
-                            _failure_hint = f"upstream provider overloaded ({_resp_error_code})"
-                        elif _resp_error_code is not None:
-                            _failure_hint = f"upstream error (code {_resp_error_code}, {api_duration:.0f}s)"
-                        elif api_duration < 10:
-                            _failure_hint = f"fast response ({api_duration:.1f}s) — likely rate limited"
-                        elif api_duration > 60:
-                            _failure_hint = f"slow response ({api_duration:.0f}s) — likely upstream timeout"
-                        else:
-                            _failure_hint = f"response time {api_duration:.1f}s"
-
                        self._vprint(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
                        self._vprint(f"{self.log_prefix}   🏢 Provider: {provider_name}", force=True)
                        cleaned_provider_error = self._clean_error_message(error_msg)
                        self._vprint(f"{self.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                        self._vprint(f"{self.log_prefix}   ⏱️  {_failure_hint}", force=True)
+                        self._vprint(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)", force=True)
                        
                        if retry_count >= max_retries:
                            # Try fallback before giving up
@@ -8341,13 +8260,14 @@ class AIAgent:
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
-                                "error": f"Invalid API response after {max_retries} retries: {_failure_hint}",
+                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
                                "failed": True  # Mark as failure for filtering
                            }
                        
-                        # Backoff before retry — jittered exponential: 5s base, 120s cap
+                        # Longer backoff for rate limiting (likely cause of None choices)
+                        # Jittered exponential: 5s base, 120s cap + random jitter
                        wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
+                        self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...", force=True)
                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                        
                        # Sleep in small increments to stay responsive to interrupts
@@ -8358,7 +8278,7 @@ class AIAgent:
                                self._persist_session(messages, conversation_history)
                                self.clear_interrupt()
                                return {
-                                    "final_response": f"Operation interrupted during retry ({_failure_hint}, attempt {retry_count}/{max_retries}).",
+                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
@@ -9761,25 +9681,12 @@ class AIAgent:
                    
                    # Pop thinking-only prefill message(s) before appending
                    # (tool-call path — same rationale as the final-response path).
-                    _had_prefill = False
                    while (
                        messages
                        and isinstance(messages[-1], dict)
                        and messages[-1].get("_thinking_prefill")
                    ):
                        messages.pop()
-                        _had_prefill = True
-
-                    # Reset prefill counter when tool calls follow a prefill
-                    # recovery.  Without this, the counter accumulates across
-                    # the whole conversation — a model that intermittently
-                    # empties (empty → prefill → tools → empty → prefill →
-                    # tools) burns both prefill attempts and the third empty
-                    # gets zero recovery.  Resetting here treats each tool-
-                    # call success as a fresh start.
-                    if _had_prefill:
-                        self._thinking_prefill_retries = 0
-                        self._empty_content_retries = 0

                    messages.append(assistant_msg)
                    self._emit_interim_assistant_message(assistant_msg)
@@ -9898,30 +9805,6 @@ class AIAgent:
                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
-                        # ── Partial stream recovery ─────────────────────
-                        # If content was already streamed to the user before
-                        # the connection died, use it as the final response
-                        # instead of falling through to prior-turn fallback
-                        # or wasting API calls on retries.
-                        _partial_streamed = (
-                            getattr(self, "_current_streamed_assistant_text", "") or ""
-                        )
-                        if self._has_content_after_think_block(_partial_streamed):
-                            _turn_exit_reason = "partial_stream_recovery"
-                            _recovered = self._strip_think_blocks(_partial_streamed).strip()
-                            logger.info(
-                                "Partial stream content delivered (%d chars) "
-                                "— using as final response",
-                                len(_recovered),
-                            )
-                            self._emit_status(
-                                "↻ Stream interrupted — using delivered content "
-                                "as final response"
-                            )
-                            final_response = _recovered
-                            self._response_was_previewed = True
-                            break
-
                        # If the previous turn already delivered real content alongside
                        # tool calls (e.g. "You're welcome!" + memory save), the model
                        # has nothing more to say. Use the earlier content immediately
@@ -9979,23 +9862,16 @@ class AIAgent:
                            self._save_session_log(messages)
                            continue

-                        # ── Empty response retry ──────────────────────
-                        # Model returned nothing usable.  Retry up to 3
-                        # times before attempting fallback.  This covers
-                        # both truly empty responses (no content, no
-                        # reasoning) AND reasoning-only responses after
-                        # prefill exhaustion — models like mimo-v2-pro
-                        # always populate reasoning fields via OpenRouter,
-                        # so the old `not _has_structured` guard blocked
-                        # retries for every reasoning model after prefill.
-                        _truly_empty = not self._strip_think_blocks(
-                            final_response
-                        ).strip()
-                        _prefill_exhausted = (
-                            _has_structured
-                            and self._thinking_prefill_retries >= 2
-                        )
-                        if _truly_empty and (not _has_structured or _prefill_exhausted) and self._empty_content_retries < 3:
+                        # ── Empty response retry (no reasoning) ──────
+                        # Model returned nothing — no content, no
+                        # structured reasoning, no tool calls.  Common
+                        # with open models (transient provider issues,
+                        # rate limits, sampling flukes).  Retry up to 3
+                        # times before attempting fallback.  Skip when
+                        # content has inline <think> tags (model chose
+                        # to reason, just no visible text).
+                        _truly_empty = not final_response.strip()
+                        if _truly_empty and not _has_structured and self._empty_content_retries < 3:
                            self._empty_content_retries += 1
                            logger.warning(
                                "Empty response (no content or reasoning) — "
@@ -10189,11 +10065,17 @@ class AIAgent:
        if final_response is None and (
            api_call_count >= self.max_iterations
            or self.iteration_budget.remaining <= 0
-        ):
-            # Budget exhausted — ask the model for a summary via one extra
-            # API call with tools stripped.  _handle_max_iterations injects a
-            # user message and makes a single toolless request.
-            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
+        ) and not self._budget_exhausted_injected:
+            # Budget exhausted but we haven't tried asking the model to
+            # summarise yet.  Inject a user message and give it one grace
+            # API call to produce a text response.
+            self._budget_exhausted_injected = True
+            self._budget_grace_call = True
+            _grace_msg = (
+                "Your tool budget ran out. Please give me the information "
+                "or actions you've completed so far."
+            )
+            messages.append({"role": "user", "content": _grace_msg})
            self._emit_status(
                f"⚠️ Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                "— asking model to summarise"
@@ -10203,6 +10085,14 @@ class AIAgent:
                    f"\n⚠️  Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                    "— requesting summary..."
                )
+
+        if final_response is None and (
+            api_call_count >= self.max_iterations
+            or self.iteration_budget.remaining <= 0
+        ) and not self._budget_grace_call:
+            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
+            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
+                print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
@@ -1,325 +0,0 @@
-#!/usr/bin/env python3
-"""Build the Hermes Skills Index — a centralized JSON catalog of all skills.
-
-This script crawls every skill source (skills.sh, GitHub taps, official,
-clawhub, lobehub, claude-marketplace) and writes a JSON index with resolved
-GitHub paths. The index is served as a static file on the docs site so that
-`hermes skills search/install` can use it without hitting the GitHub API.
-
-Usage:
-    # Local (uses gh CLI or GITHUB_TOKEN for auth)
-    python scripts/build_skills_index.py
-
-    # CI (set GITHUB_TOKEN as secret)
-    GITHUB_TOKEN=ghp_... python scripts/build_skills_index.py
-
-Output: website/static/api/skills-index.json
-"""
-
-import json
-import os
-import sys
-import time
-from collections import defaultdict
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from datetime import datetime, timezone
-
-# Allow importing from repo root
-REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.insert(0, REPO_ROOT)
-
-# Ensure HERMES_HOME is set (needed by tools/skills_hub.py imports)
-os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
-
-from tools.skills_hub import (
-    GitHubAuth,
-    GitHubSource,
-    SkillsShSource,
-    OptionalSkillSource,
-    WellKnownSkillSource,
-    ClawHubSource,
-    ClaudeMarketplaceSource,
-    LobeHubSource,
-    SkillMeta,
-)
-import httpx
-
-OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "skills-index.json")
-INDEX_VERSION = 1
-
-
-def _meta_to_dict(meta: SkillMeta) -> dict:
-    """Convert a SkillMeta to a serializable dict."""
-    return {
-        "name": meta.name,
-        "description": meta.description,
-        "source": meta.source,
-        "identifier": meta.identifier,
-        "trust_level": meta.trust_level,
-        "repo": meta.repo or "",
-        "path": meta.path or "",
-        "tags": meta.tags or [],
-        "extra": meta.extra or {},
-    }
-
-
-def crawl_source(source, source_name: str, limit: int) -> list:
-    """Crawl a single source and return skill dicts."""
-    print(f"  Crawling {source_name}...", flush=True)
-    start = time.time()
-    try:
-        results = source.search("", limit=limit)
-    except Exception as e:
-        print(f"  Error crawling {source_name}: {e}", file=sys.stderr)
-        return []
-    skills = [_meta_to_dict(m) for m in results]
-    elapsed = time.time() - start
-    print(f"  {source_name}: {len(skills)} skills ({elapsed:.1f}s)", flush=True)
-    return skills
-
-
-def crawl_skills_sh(source: SkillsShSource) -> list:
-    """Crawl skills.sh using popular queries for broad coverage."""
-    print("  Crawling skills.sh (popular queries)...", flush=True)
-    start = time.time()
-
-    queries = [
-        "",  # featured
-        "react", "python", "web", "api", "database", "docker",
-        "testing", "scraping", "design", "typescript", "git",
-        "aws", "security", "data", "ml", "ai", "devops",
-        "frontend", "backend", "mobile", "cli", "documentation",
-        "kubernetes", "terraform", "rust", "go", "java",
-    ]
-
-    all_skills: dict[str, dict] = {}
-    for query in queries:
-        try:
-            results = source.search(query, limit=50)
-            for meta in results:
-                entry = _meta_to_dict(meta)
-                if entry["identifier"] not in all_skills:
-                    all_skills[entry["identifier"]] = entry
-        except Exception as e:
-            print(f"    Warning: skills.sh search '{query}' failed: {e}",
-                  file=sys.stderr)
-
-    elapsed = time.time() - start
-    print(f"  skills.sh: {len(all_skills)} unique skills ({elapsed:.1f}s)",
-          flush=True)
-    return list(all_skills.values())
-
-
-def _fetch_repo_tree(repo: str, auth: GitHubAuth) -> list:
-    """Fetch the recursive tree for a repo. Returns list of tree entries."""
-    headers = auth.get_headers()
-    try:
-        resp = httpx.get(
-            f"https://api.github.com/repos/{repo}",
-            headers=headers, timeout=15, follow_redirects=True,
-        )
-        if resp.status_code != 200:
-            return []
-        branch = resp.json().get("default_branch", "main")
-
-        resp = httpx.get(
-            f"https://api.github.com/repos/{repo}/git/trees/{branch}",
-            params={"recursive": "1"},
-            headers=headers, timeout=30, follow_redirects=True,
-        )
-        if resp.status_code != 200:
-            return []
-        data = resp.json()
-        if data.get("truncated"):
-            return []
-        return data.get("tree", [])
-    except Exception:
-        return []
-
-
-def batch_resolve_paths(skills: list, auth: GitHubAuth) -> list:
-    """Resolve GitHub paths for skills.sh entries using batch tree lookups.
-
-    Instead of resolving each skill individually (N×M API calls), we:
-    1. Group skills by repo
-    2. Fetch one tree per repo (2 API calls per repo)
-    3. Find all SKILL.md files in the tree
-    4. Match skills to their resolved paths
-    """
-    # Filter to skills.sh entries that need resolution
-    skills_sh = [s for s in skills if s["source"] in ("skills.sh", "skills-sh")]
-    if not skills_sh:
-        return skills
-
-    print(f"  Resolving paths for {len(skills_sh)} skills.sh entries...",
-          flush=True)
-    start = time.time()
-
-    # Group by repo
-    by_repo: dict[str, list] = defaultdict(list)
-    for s in skills_sh:
-        repo = s.get("repo", "")
-        if repo:
-            by_repo[repo].append(s)
-
-    print(f"    {len(by_repo)} unique repos to scan", flush=True)
-
-    resolved_count = 0
-
-    # Fetch trees in parallel (up to 6 concurrent)
-    def _resolve_repo(repo: str, entries: list):
-        tree = _fetch_repo_tree(repo, auth)
-        if not tree:
-            return 0
-
-        # Find all SKILL.md paths in this repo
-        skill_paths = {}  # skill_dir_name -> full_path
-        for item in tree:
-            if item.get("type") != "blob":
-                continue
-            path = item.get("path", "")
-            if path.endswith("/SKILL.md"):
-                skill_dir = path[: -len("/SKILL.md")]
-                dir_name = skill_dir.split("/")[-1]
-                skill_paths[dir_name.lower()] = f"{repo}/{skill_dir}"
-
-                # Also check SKILL.md frontmatter name if we can match by path
-                # For now, just index by directory name
-            elif path == "SKILL.md":
-                # Root-level SKILL.md
-                skill_paths["_root_"] = f"{repo}"
-
-        count = 0
-        for entry in entries:
-            # Try to match the skill's name/path to a tree entry
-            skill_name = entry.get("name", "").lower()
-            skill_path = entry.get("path", "").lower()
-            identifier = entry.get("identifier", "")
-
-            # Extract the skill token from the identifier
-            # e.g. "skills-sh/d4vinci/scrapling/scrapling-official" -> "scrapling-official"
-            parts = identifier.replace("skills-sh/", "").replace("skills.sh/", "")
-            skill_token = parts.split("/")[-1].lower() if "/" in parts else ""
-
-            # Try matching in order of likelihood
-            for candidate in [skill_token, skill_name, skill_path]:
-                if not candidate:
-                    continue
-                matched = skill_paths.get(candidate)
-                if matched:
-                    entry["resolved_github_id"] = matched
-                    count += 1
-                    break
-            else:
-                # Try fuzzy: skill_token with common transformations
-                for tree_name, tree_path in skill_paths.items():
-                    if (skill_token and (
-                        tree_name.replace("-", "") == skill_token.replace("-", "")
-                        or skill_token in tree_name
-                        or tree_name in skill_token
-                    )):
-                        entry["resolved_github_id"] = tree_path
-                        count += 1
-                        break
-
-        return count
-
-    with ThreadPoolExecutor(max_workers=6) as pool:
-        futures = {
-            pool.submit(_resolve_repo, repo, entries): repo
-            for repo, entries in by_repo.items()
-        }
-        for future in as_completed(futures):
-            try:
-                resolved_count += future.result()
-            except Exception as e:
-                repo = futures[future]
-                print(f"    Warning: {repo}: {e}", file=sys.stderr)
-
-    elapsed = time.time() - start
-    print(f"  Resolved {resolved_count}/{len(skills_sh)} paths ({elapsed:.1f}s)",
-          flush=True)
-    return skills
-
-
-def main():
-    print("Building Hermes Skills Index...", flush=True)
-    overall_start = time.time()
-
-    auth = GitHubAuth()
-    print(f"GitHub auth: {auth.auth_method()}")
-    if auth.auth_method() == "anonymous":
-        print("WARNING: No GitHub authentication — rate limit is 60/hr. "
-              "Set GITHUB_TOKEN for better results.", file=sys.stderr)
-
-    skills_sh_source = SkillsShSource(auth=auth)
-    sources = {
-        "official": OptionalSkillSource(),
-        "well-known": WellKnownSkillSource(),
-        "github": GitHubSource(auth=auth),
-        "clawhub": ClawHubSource(),
-        "claude-marketplace": ClaudeMarketplaceSource(auth=auth),
-        "lobehub": LobeHubSource(),
-    }
-
-    all_skills: list[dict] = []
-
-    # Crawl skills.sh
-    all_skills.extend(crawl_skills_sh(skills_sh_source))
-
-    # Crawl other sources in parallel
-    with ThreadPoolExecutor(max_workers=4) as pool:
-        futures = {}
-        for name, source in sources.items():
-            futures[pool.submit(crawl_source, source, name, 500)] = name
-        for future in as_completed(futures):
-            try:
-                all_skills.extend(future.result())
-            except Exception as e:
-                print(f"  Error: {e}", file=sys.stderr)
-
-    # Batch resolve GitHub paths for skills.sh entries
-    all_skills = batch_resolve_paths(all_skills, auth)
-
-    # Deduplicate by identifier
-    seen: dict[str, dict] = {}
-    for skill in all_skills:
-        key = skill["identifier"]
-        if key not in seen:
-            seen[key] = skill
-    deduped = list(seen.values())
-
-    # Sort
-    source_order = {"official": 0, "skills-sh": 1, "skills.sh": 1,
-                    "github": 2, "well-known": 3, "clawhub": 4,
-                    "claude-marketplace": 5, "lobehub": 6}
-    deduped.sort(key=lambda s: (source_order.get(s["source"], 99), s["name"]))
-
-    # Build index
-    index = {
-        "version": INDEX_VERSION,
-        "generated_at": datetime.now(timezone.utc).isoformat(),
-        "skill_count": len(deduped),
-        "skills": deduped,
-    }
-
-    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
-    with open(OUTPUT_PATH, "w") as f:
-        json.dump(index, f, separators=(",", ":"), ensure_ascii=False)
-
-    elapsed = time.time() - overall_start
-    file_size = os.path.getsize(OUTPUT_PATH)
-    print(f"\nDone! {len(deduped)} skills indexed in {elapsed:.0f}s")
-    print(f"Output: {OUTPUT_PATH} ({file_size / 1024:.0f} KB)")
-
-    from collections import Counter
-    by_source = Counter(s["source"] for s in deduped)
-    for src, count in sorted(by_source.items(), key=lambda x: -x[1]):
-        resolved = sum(1 for s in deduped
-                       if s["source"] == src and s.get("resolved_github_id"))
-        extra = f" ({resolved} resolved)" if resolved else ""
-        print(f"  {src}: {count}{extra}")
-
-
-if __name__ == "__main__":
-    main()
@@ -15,9 +15,9 @@
      }
    },
    "node_modules/@borewit/text-codec": {
-      "version": "0.2.2",
-      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.2.tgz",
-      "integrity": "sha512-DDaRehssg1aNrH4+2hnj1B7vnUGEjU6OIlyRdkMd0aUdIUvKXrJfXsy8LVtXAy7DRvYVluWbMspsRhz2lcW0mQ==",
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.1.tgz",
+      "integrity": "sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==",
      "license": "MIT",
      "funding": {
        "type": "github",
@@ -1088,9 +1088,9 @@
      }
    },
    "node_modules/file-type": {
-      "version": "21.3.4",
-      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.4.tgz",
-      "integrity": "sha512-Ievi/yy8DS3ygGvT47PjSfdFoX+2isQueoYP1cntFW1JLYAuS4GD7NUPGg4zv2iZfV52uDyk5w5Z0TdpRS6Q1g==",
+      "version": "21.3.0",
+      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.0.tgz",
+      "integrity": "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/inflate": "^0.4.1",
@@ -1456,9 +1456,9 @@
      "license": "MIT"
    },
    "node_modules/music-metadata": {
-      "version": "11.12.3",
-      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.3.tgz",
-      "integrity": "sha512-n6hSTZkuD59qWgHh6IP5dtDlDZQXoxk/bcA85Jywg8Z1iFrlNgl2+GTFgjZyn52W5UgQpV42V4XqrQZZAMbZTQ==",
+      "version": "11.12.1",
+      "resolved": "https://registry.npmjs.org/music-metadata/-/music-metadata-11.12.1.tgz",
+      "integrity": "sha512-j++ltLxHDb5VCXET9FzQ8bnueiLHwQKgCO7vcbkRH/3F7fRjPkv6qncGEJ47yFhmemcYtgvsOAlcQ1dRBTkDjg==",
      "funding": [
        {
          "type": "github",
@@ -1471,11 +1471,11 @@
      ],
      "license": "MIT",
      "dependencies": {
-        "@borewit/text-codec": "^0.2.2",
+        "@borewit/text-codec": "^0.2.1",
        "@tokenizer/token": "^0.3.0",
        "content-type": "^1.0.5",
        "debug": "^4.4.3",
-        "file-type": "^21.3.1",
+        "file-type": "^21.3.0",
        "media-typer": "^1.1.0",
        "strtok3": "^10.3.4",
        "token-types": "^6.1.2",
@@ -1589,9 +1589,9 @@
      }
    },
    "node_modules/path-to-regexp": {
-      "version": "0.1.13",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.13.tgz",
-      "integrity": "sha512-A/AGNMFN3c8bOlvV9RreMdrv7jsmF9XIfDeCd87+I8RNg6s78BhJxMu69NEMHBSJFxKidViTEdruRwEk/WIKqA==",
+      "version": "0.1.12",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.12.tgz",
+      "integrity": "sha512-RA1GjUVMnvYFxuqovrEqZoxxW5NUZqbwKtYz/Tt7nXerk0LbLblQmrsgdeOxV5SFHf0UDggjS/bSeOZwt1pmEQ==",
      "license": "MIT"
    },
    "node_modules/pino": {
@@ -2002,9 +2002,9 @@
      }
    },
    "node_modules/strtok3": {
-      "version": "10.3.5",
-      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.5.tgz",
-      "integrity": "sha512-ki4hZQfh5rX0QDLLkOCj+h+CVNkqmp/CMf8v8kZpkNVK6jGQooMytqzLZYUVYIZcFZ6yDB70EfD8POcFXiF5oA==",
+      "version": "10.3.4",
+      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.4.tgz",
+      "integrity": "sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==",
      "license": "MIT",
      "dependencies": {
        "@tokenizer/token": "^0.3.0"
@@ -19,7 +19,7 @@ What makes Hermes different:

 - **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment.
 - **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works.
- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 10+ other platforms with full tool access, not just chat.
+- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat.
 - **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically.
 - **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory.
 - **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem.
@@ -148,7 +148,7 @@ hermes gateway status       Check status
 hermes gateway setup        Configure platforms
 ```

-Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, BlueBubbles (iMessage), Weixin (WeChat), API Server, Webhooks. Open WebUI connects via the API Server adapter.
+Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI.

 Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/

@@ -215,7 +215,7 @@ hermes insights [--days N]  Usage analytics
 hermes update               Update to latest version
 hermes pairing list/approve/revoke  DM authorization
 hermes plugins list/install/remove  Plugin management
-hermes honcho setup/status  Honcho memory integration (requires honcho plugin)
+hermes honcho setup/status  Honcho memory integration
 hermes memory setup/status/off  Memory provider config
 hermes completion bash|zsh  Shell completions
 hermes acp                  ACP server (IDE integration)
@@ -269,28 +269,6 @@ Type these during an interactive chat session.
 /plugins             List plugins (CLI)
 ```

-### Gateway
-```
-/approve             Approve a pending command (gateway)
-/deny                Deny a pending command (gateway)
-/restart             Restart gateway (gateway)
-/sethome             Set current chat as home channel (gateway)
-/update              Update Hermes to latest (gateway)
-/platforms (/gateway) Show platform connection status (gateway)
-```
-
-### Utility
-```
-/branch (/fork)      Branch the current session
-/btw                 Ephemeral side question (doesn't interrupt main task)
-/fast                Toggle priority/fast processing
-/browser             Open CDP browser connection
-/history             Show conversation history (CLI)
-/save                Save conversation to file (CLI)
-/paste               Attach clipboard image (CLI)
-/image               Attach local image file (CLI)
-```
-
 ### Info
 ```
 /help                Show commands
@@ -333,11 +311,11 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
 | `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) |
 | `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) |
 | `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` |
-| `stt` | `enabled`, `provider` (local/groq/openai/mistral) |
-| `tts` | `provider` (edge/elevenlabs/openai/minimax/mistral/neutts) |
+| `stt` | `enabled`, `provider` (local/groq/openai) |
+| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) |
 | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
 | `security` | `tirith_enabled`, `website_blocklist` |
-| `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
+| `delegation` | `model`, `provider`, `max_iterations` (50) |
 | `smart_model_routing` | `enabled`, `cheap_model` |
 | `checkpoints` | `enabled`, `max_snapshots` (50) |

@@ -345,7 +323,7 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con

 ### Providers

-20+ providers supported. Set via `hermes model` or `hermes setup`.
+18 providers supported. Set via `hermes model` or `hermes setup`.

 | Provider | Auth | Key env var |
 |----------|------|-------------|
@@ -354,23 +332,16 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
 | Nous Portal | OAuth | `hermes login --provider nous` |
 | OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
 | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
-| Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
 | DeepSeek | API key | `DEEPSEEK_API_KEY` |
-| xAI / Grok | API key | `XAI_API_KEY` |
 | Hugging Face | Token | `HF_TOKEN` |
 | Z.AI / GLM | API key | `GLM_API_KEY` |
 | MiniMax | API key | `MINIMAX_API_KEY` |
-| MiniMax CN | API key | `MINIMAX_CN_API_KEY` |
 | Kimi / Moonshot | API key | `KIMI_API_KEY` |
 | Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` |
-| Xiaomi MiMo | API key | `XIAOMI_API_KEY` |
 | Kilo Code | API key | `KILOCODE_API_KEY` |
-| AI Gateway (Vercel) | API key | `AI_GATEWAY_API_KEY` |
-| OpenCode Zen | API key | `OPENCODE_ZEN_API_KEY` |
-| OpenCode Go | API key | `OPENCODE_GO_API_KEY` |
-| Qwen OAuth | OAuth | `hermes login --provider qwen-oauth` |
 | Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml |
-| GitHub Copilot ACP | External | `COPILOT_CLI_PATH` or Copilot CLI |
+
+Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP.

 Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers

@@ -394,10 +365,6 @@ Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable
 | `delegation` | Subagent task delegation |
 | `cronjob` | Scheduled task management |
 | `clarify` | Ask user clarifying questions |
-| `messaging` | Cross-platform message sending |
-| `search` | Web search only (subset of `web`) |
-| `todo` | In-session task planning and tracking |
-| `rl` | Reinforcement learning tools (off by default) |
 | `moa` | Mixture of Agents (off by default) |
 | `homeassistant` | Smart home control (off by default) |

@@ -415,13 +382,12 @@ Provider priority (auto-detected):
 1. **Local faster-whisper** — free, no API key: `pip install faster-whisper`
 2. **Groq Whisper** — free tier: set `GROQ_API_KEY`
 3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY`
-4. **Mistral Voxtral** — set `MISTRAL_API_KEY`

 Config:
 ```yaml
 stt:
  enabled: true
-  provider: local        # local, groq, openai, mistral
+  provider: local        # local, groq, openai
  local:
    model: base          # tiny, base, small, medium, large-v3
 ```
@@ -433,9 +399,8 @@ stt:
 | Edge TTS | None | Yes (default) |
 | ElevenLabs | `ELEVENLABS_API_KEY` | Free tier |
 | OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid |
-| MiniMax | `MINIMAX_API_KEY` | Paid |
-| Mistral (Voxtral) | `MISTRAL_API_KEY` | Paid |
-| NeuTTS (local) | None (`pip install neutts[all]` + `espeak-ng`) | Free |
+| Kokoro (local) | None | Free |
+| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier |

 Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`.

@@ -527,7 +492,7 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 ### Voice not working
 1. Check `stt.enabled: true` in config.yaml
 2. Verify provider: `pip install faster-whisper` or set API key
-3. In gateway: `/restart`. In CLI: exit and relaunch.
+3. Restart gateway: `/restart`

 ### Tool not available
 1. `hermes tools` — check if toolset is enabled for your platform
@@ -538,11 +503,10 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 1. `hermes doctor` — check config and dependencies
 2. `hermes login` — re-authenticate OAuth providers
 3. Check `.env` has the right API key
-4. **Copilot 403**: `gh auth login` tokens do NOT work for Copilot API. You must use the Copilot-specific OAuth device code flow via `hermes model` → GitHub Copilot.

 ### Changes not taking effect
 - **Tools/skills:** `/reset` starts a new session with updated toolset
- **Config changes:** In gateway: `/restart`. In CLI: exit and relaunch.
+- **Config changes:** `/restart` reloads gateway config
 - **Code changes:** Restart the CLI or gateway process

 ### Skills not showing
@@ -556,23 +520,6 @@ Check logs first:
 grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20
 ```

-Common gateway problems:
- **Gateway dies on SSH logout**: Enable linger: `sudo loginctl enable-linger $USER`
- **Gateway dies on WSL2 close**: WSL2 requires `systemd=true` in `/etc/wsl.conf` for systemd services to work. Without it, gateway falls back to `nohup` (dies when session closes).
- **Gateway crash loop**: Reset the failed state: `systemctl --user reset-failed hermes-gateway`
-
-### Platform-specific issues
- **Discord bot silent**: Must enable **Message Content Intent** in Bot → Privileged Gateway Intents.
- **Slack bot only works in DMs**: Must subscribe to `message.channels` event. Without it, the bot ignores public channels.
- **Windows HTTP 400 "No models provided"**: Config file encoding issue (BOM). Ensure `config.yaml` is saved as UTF-8 without BOM.
-
-### Auxiliary models not working
-If `auxiliary` tasks (vision, compression, session_search) fail silently, the `auto` provider can't find a backend. Either set `OPENROUTER_API_KEY` or `GOOGLE_API_KEY`, or explicitly configure each auxiliary task's provider:
-```bash
-hermes config set auxiliary.vision.provider <your_provider>
-hermes config set auxiliary.vision.model <model_name>
-```
-
 ---

 ## Where to Find Things
@@ -610,7 +557,7 @@ hermes-agent/
 ├── toolsets.py           # Toolset definitions
 ├── cli.py                # Interactive CLI (HermesCLI)
 ├── hermes_state.py       # SQLite session store
-├── agent/                # Prompt builder, context compression, memory, model routing, credential pooling, skill dispatch
+├── agent/                # Prompt builder, compression, display, adapters
 ├── hermes_cli/           # CLI subcommands, config, setup, commands
 │   ├── commands.py       # Slash command registry (CommandDef)
 │   ├── config.py         # DEFAULT_CONFIG, env var definitions
@@ -679,6 +626,7 @@ run_conversation():
 ### Testing

 ```bash
+source venv/bin/activate  # or .venv/bin/activate
 python -m pytest tests/ -o 'addopts=' -q   # Full suite
 python -m pytest tests/tools/ -q            # Specific area
 ```
@@ -1,51 +1,29 @@
 ---
 name: github-code-review
-description: Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Works with gh CLI or falls back to git + GitHub REST API via curl.
-version: 1.1.0
+description: Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Uses GitHub MCP tools (mcp_github_*) as the primary interface, with git CLI for local diff operations.
+version: 2.0.0
 author: Hermes Agent
 license: MIT
 metadata:
  hermes:
-    tags: [GitHub, Code-Review, Pull-Requests, Git, Quality]
+    tags: [GitHub, Code-Review, Pull-Requests, Git, Quality, MCP]
    related_skills: [github-auth, github-pr-workflow]
 ---

 # GitHub Code Review

-Perform code reviews on local changes before pushing, or review open PRs on GitHub. Most of this skill uses plain `git` — the `gh`/`curl` split only matters for PR-level interactions.
+Perform code reviews on local changes before pushing, or review open PRs on GitHub. This skill uses **GitHub MCP tools** (`mcp_github_*`) as the primary interface for all GitHub API interactions, with plain `git` for local diff operations.

 ## Prerequisites

- Authenticated with GitHub (see `github-auth` skill)
- Inside a git repository
-
-### Setup (for PR interactions)
-
-```bash
-if command -v gh &>/dev/null && gh auth status &>/dev/null; then
-  AUTH="gh"
-else
-  AUTH="git"
-  if [ -z "$GITHUB_TOKEN" ]; then
-    if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then
-      GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r')
-    elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then
-      GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|')
-    fi
-  fi
-fi
-
-REMOTE_URL=$(git remote get-url origin)
-OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||')
-OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1)
-REPO=$(echo "$OWNER_REPO" | cut -d/ -f2)
-```
+- GitHub MCP server configured (provides `mcp_github_*` tools)
+- Inside a git repository (for local diff operations)

 ---

 ## 1. Reviewing Local Changes (Pre-Push)

-This is pure `git` — works everywhere, no API needed.
+Local diffs use plain `git` — no API needed.

 ### Get the Diff

@@ -122,158 +100,206 @@ When reviewing local changes, present findings in this structure:

 ---

-## 2. Reviewing a Pull Request on GitHub
+## 2. Reviewing a Pull Request on GitHub (MCP Tools)

-### View PR Details
+### Step 1: Gather PR Context

-**With gh:**
+Use MCP tools to get PR metadata, description, and changed files:

-```bash
-gh pr view 123
-gh pr diff 123
-gh pr diff 123 --name-only
+```
+# Get PR details (title, author, description, branch, status)
+mcp_github_pull_request_read(method="get", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get the diff
+mcp_github_pull_request_read(method="get_diff", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get list of changed files with additions/deletions
+mcp_github_pull_request_read(method="get_files", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get CI/CD status
+mcp_github_pull_request_read(method="get_status", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get check runs (individual CI jobs)
+mcp_github_pull_request_read(method="get_check_runs", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
 ```

-**With git + curl:**
+### Step 2: Read File Contents for Context

-```bash
-PR_NUMBER=123
+For each changed file, read the full file to understand the surrounding context:

-# Get PR details
-curl -s \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \
-  | python3 -c "
-import sys, json
-pr = json.load(sys.stdin)
-print(f\"Title: {pr['title']}\")
-print(f\"Author: {pr['user']['login']}\")
-print(f\"Branch: {pr['head']['ref']} -> {pr['base']['ref']}\")
-print(f\"State: {pr['state']}\")
-print(f\"Body:\n{pr['body']}\")"
-
-# List changed files
-curl -s \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/files \
-  | python3 -c "
-import sys, json
-for f in json.load(sys.stdin):
-    print(f\"{f['status']:10} +{f['additions']:-4} -{f['deletions']:-4}  {f['filename']}\")"
+```
+# Read specific files from the PR branch
+mcp_github_get_file_contents(owner=OWNER, repo=REPO, path="src/auth/login.py", ref="refs/pull/PR_NUMBER/head")
 ```

-### Check Out PR Locally for Full Review
+### Step 3: Check Out Locally (Optional — for running tests)

-This works with plain `git` — no `gh` needed:
+If you need to run tests or linters locally:

 ```bash
-# Fetch the PR branch and check it out
-git fetch origin pull/123/head:pr-123
-git checkout pr-123
+git fetch origin pull/PR_NUMBER/head:pr-PR_NUMBER
+git checkout pr-PR_NUMBER

-# Now you can use read_file, search_files, run tests, etc.
+# Run tests
+python -m pytest 2>&1 | tail -20

-# View diff against the base branch
-git diff main...pr-123
+# Run linter
+ruff check . 2>&1 | head -30
 ```

-**With gh (shortcut):**
+### Step 4: Get Existing Review Comments
+
+Check what's already been discussed:
+
+```
+# Get review threads (grouped comments on code locations)
+mcp_github_pull_request_read(method="get_review_comments", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get general PR comments
+mcp_github_pull_request_read(method="get_comments", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+
+# Get formal reviews (approvals, change requests)
+mcp_github_pull_request_read(method="get_reviews", owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+```
+
+### Step 5: Apply the Review Checklist (Section 3)
+
+Go through each category systematically.
+
+### Step 6: Submit a Formal Review with Inline Comments
+
+Use the MCP review tools to submit findings:
+
+**Create a pending review, add inline comments, then submit:**
+
+```
+# Step A: Create a pending review (omit "event" to keep it pending)
+mcp_github_pull_request_review_write(
+    method="create",
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER
+)
+
+# Step B: Add inline comments to the pending review
+mcp_github_add_comment_to_pending_review(
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    path="src/auth.py",
+    line=45,
+    body="🔴 **Critical:** User input passed directly to SQL query — use parameterized queries.",
+    subjectType="LINE",
+    side="RIGHT"
+)
+
+mcp_github_add_comment_to_pending_review(
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    path="src/models/user.py",
+    line=23,
+    body="⚠️ **Warning:** Password stored without hashing. Use bcrypt or argon2.",
+    subjectType="LINE",
+    side="RIGHT"
+)
+
+# Step C: Submit the pending review
+mcp_github_pull_request_review_write(
+    method="submit_pending",
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    event="REQUEST_CHANGES",  # or "APPROVE" or "COMMENT"
+    body="## Hermes Agent Review\n\nFound 2 issues. See inline comments."
+)
+```
+
+**Or submit a review directly (no pending step):**
+
+```
+# Approve
+mcp_github_pull_request_review_write(
+    method="create",
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    event="APPROVE",
+    body="LGTM! Code looks clean — good test coverage, no security concerns."
+)
+
+# Request changes
+mcp_github_pull_request_review_write(
+    method="create",
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    event="REQUEST_CHANGES",
+    body="Found a few issues — see inline comments."
+)
+```
+
+### Step 7: Post a Summary Comment
+
+Leave a top-level summary so the PR author gets the full picture:
+
+```
+mcp_github_add_issue_comment(
+    owner=OWNER,
+    repo=REPO,
+    issue_number=PR_NUMBER,
+    body="""## Code Review Summary
+
+**Verdict: Changes Requested** (2 issues, 1 suggestion)
+
+### 🔴 Critical
+- **src/auth.py:45** — SQL injection vulnerability
+
+### ⚠️ Warnings
+- **src/models.py:23** — Plaintext password storage
+
+### 💡 Suggestions
+- **src/utils.py:8** — Duplicated logic, consider consolidating
+
+### ✅ Looks Good
+- Clean API design
+- Good error handling in the middleware layer
+
+---
+*Reviewed by Hermes Agent*"""
+)
+```
+
+### Step 8: Reply to Existing Comments
+
+If the PR author responds to your review:
+
+```
+# Reply to a specific review comment
+mcp_github_add_reply_to_pull_request_comment(
+    owner=OWNER,
+    repo=REPO,
+    pullNumber=PR_NUMBER,
+    commentId=COMMENT_ID,
+    body="Good point! That approach works too."
+)
+```
+
+### Step 9: Request Copilot Review (Optional)
+
+For automated AI feedback before your review:
+
+```
+mcp_github_request_copilot_review(owner=OWNER, repo=REPO, pullNumber=PR_NUMBER)
+```
+
+### Step 10: Clean Up (if checked out locally)

 ```bash
-gh pr checkout 123
+git checkout main
+git branch -D pr-PR_NUMBER
 ```

-### Leave Comments on a PR
-
-**General PR comment — with gh:**
-
-```bash
-gh pr comment 123 --body "Overall looks good, a few suggestions below."
-```
-
-**General PR comment — with curl:**
-
-```bash
-curl -s -X POST \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/issues/$PR_NUMBER/comments \
-  -d '{"body": "Overall looks good, a few suggestions below."}'
-```
-
-### Leave Inline Review Comments
-
-**Single inline comment — with gh (via API):**
-
-```bash
-HEAD_SHA=$(gh pr view 123 --json headRefOid --jq '.headRefOid')
-
-gh api repos/$OWNER/$REPO/pulls/123/comments \
-  --method POST \
-  -f body="This could be simplified with a list comprehension." \
-  -f path="src/auth/login.py" \
-  -f commit_id="$HEAD_SHA" \
-  -f line=45 \
-  -f side="RIGHT"
-```
-
-**Single inline comment — with curl:**
-
-```bash
-# Get the head commit SHA
-HEAD_SHA=$(curl -s \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \
-  | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])")
-
-curl -s -X POST \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/comments \
-  -d "{
-    \"body\": \"This could be simplified with a list comprehension.\",
-    \"path\": \"src/auth/login.py\",
-    \"commit_id\": \"$HEAD_SHA\",
-    \"line\": 45,
-    \"side\": \"RIGHT\"
-  }"
-```
-
-### Submit a Formal Review (Approve / Request Changes)
-
-**With gh:**
-
-```bash
-gh pr review 123 --approve --body "LGTM!"
-gh pr review 123 --request-changes --body "See inline comments."
-gh pr review 123 --comment --body "Some suggestions, nothing blocking."
-```
-
-**With curl — multi-comment review submitted atomically:**
-
-```bash
-HEAD_SHA=$(curl -s \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \
-  | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])")
-
-curl -s -X POST \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/reviews \
-  -d "{
-    \"commit_id\": \"$HEAD_SHA\",
-    \"event\": \"COMMENT\",
-    \"body\": \"Code review from Hermes Agent\",
-    \"comments\": [
-      {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"Use parameterized queries to prevent SQL injection.\"},
-      {\"path\": \"src/models/user.py\", \"line\": 23, \"body\": \"Hash passwords with bcrypt before storing.\"},
-      {\"path\": \"tests/test_auth.py\", \"line\": 1, \"body\": \"Add test for expired token edge case.\"}
-    ]
-  }"
-```
-
-Event values: `"APPROVE"`, `"REQUEST_CHANGES"`, `"COMMENT"`
-
-The `line` field refers to the line number in the *new* version of the file. For deleted lines, use `"side": "LEFT"`.
-
 ---

 ## 3. Review Checklist
@@ -290,6 +316,7 @@ When performing a code review (local or PR), systematically check:
 - Input validation on user-facing inputs
 - No SQL injection, XSS, or path traversal
 - Auth/authz checks where needed
+- Use `mcp_github_run_secret_scanning` on changed files for automated secret detection

 ### Code Quality
 - Clear naming (variables, functions, classes)
@@ -327,151 +354,30 @@ When the user asks you to "review the code" or "check before pushing":

 ---

-## 5. PR Review Workflow (End-to-End)
+## 5. PR Review Workflow (End-to-End with MCP Tools)

-When the user asks you to "review PR #N", "look at this PR", or gives you a PR URL, follow this recipe:
+When the user asks you to "review PR #N", "look at this PR", or gives you a PR URL:

-### Step 1: Set up environment
+### Quick Reference

-```bash
-source ~/.hermes/skills/github/github-auth/scripts/gh-env.sh
-# Or run the inline setup block from the top of this skill
-```
-
-### Step 2: Gather PR context
-
-Get the PR metadata, description, and list of changed files to understand scope before diving into code.
-
-**With gh:**
-```bash
-gh pr view 123
-gh pr diff 123 --name-only
-gh pr checks 123
-```
-
-**With curl:**
-```bash
-PR_NUMBER=123
-
-# PR details (title, author, description, branch)
-curl -s -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER
-
-# Changed files with line counts
-curl -s -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/files
-```
-
-### Step 3: Check out the PR locally
-
-This gives you full access to `read_file`, `search_files`, and the ability to run tests.
-
-```bash
-git fetch origin pull/$PR_NUMBER/head:pr-$PR_NUMBER
-git checkout pr-$PR_NUMBER
-```
-
-### Step 4: Read the diff and understand changes
-
-```bash
-# Full diff against the base branch
-git diff main...HEAD
-
-# Or file-by-file for large PRs
-git diff main...HEAD --name-only
-# Then for each file:
-git diff main...HEAD -- path/to/file.py
-```
-
-For each changed file, use `read_file` to see full context around the changes — diffs alone can miss issues visible only with surrounding code.
-
-### Step 5: Run automated checks locally (if applicable)
-
-```bash
-# Run tests if there's a test suite
-python -m pytest 2>&1 | tail -20
-# or: npm test, cargo test, go test ./..., etc.
-
-# Run linter if configured
-ruff check . 2>&1 | head -30
-# or: eslint, clippy, etc.
-```
-
-### Step 6: Apply the review checklist (Section 3)
-
-Go through each category: Correctness, Security, Code Quality, Testing, Performance, Documentation.
-
-### Step 7: Post the review to GitHub
-
-Collect your findings and submit them as a formal review with inline comments.
-
-**With gh:**
-```bash
-# If no issues — approve
-gh pr review $PR_NUMBER --approve --body "Reviewed by Hermes Agent. Code looks clean — good test coverage, no security concerns."
-
-# If issues found — request changes with inline comments
-gh pr review $PR_NUMBER --request-changes --body "Found a few issues — see inline comments."
-```
-
-**With curl — atomic review with multiple inline comments:**
-```bash
-HEAD_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER \
-  | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])")
-
-# Build the review JSON — event is APPROVE, REQUEST_CHANGES, or COMMENT
-curl -s -X POST \
-  -H "Authorization: token $GITHUB_TOKEN" \
-  https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/reviews \
-  -d "{
-    \"commit_id\": \"$HEAD_SHA\",
-    \"event\": \"REQUEST_CHANGES\",
-    \"body\": \"## Hermes Agent Review\n\nFound 2 issues, 1 suggestion. See inline comments.\",
-    \"comments\": [
-      {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"🔴 **Critical:** User input passed directly to SQL query — use parameterized queries.\"},
-      {\"path\": \"src/models.py\", \"line\": 23, \"body\": \"⚠️ **Warning:** Password stored without hashing.\"},
-      {\"path\": \"src/utils.py\", \"line\": 8, \"body\": \"💡 **Suggestion:** This duplicates logic in core/utils.py:34.\"}
-    ]
-  }"
-```
-
-### Step 8: Also post a summary comment
-
-In addition to inline comments, leave a top-level summary so the PR author gets the full picture at a glance. Use the review output format from `references/review-output-template.md`.
-
-**With gh:**
-```bash
-gh pr comment $PR_NUMBER --body "$(cat <<'EOF'
-## Code Review Summary
-
-**Verdict: Changes Requested** (2 issues, 1 suggestion)
-
-### 🔴 Critical
- **src/auth.py:45** — SQL injection vulnerability
-
-### ⚠️ Warnings
- **src/models.py:23** — Plaintext password storage
-
-### 💡 Suggestions
- **src/utils.py:8** — Duplicated logic, consider consolidating
-
-### ✅ Looks Good
- Clean API design
- Good error handling in the middleware layer
-
---
-*Reviewed by Hermes Agent*
-EOF
-)"
-```
-
-### Step 9: Clean up
-
-```bash
-git checkout main
-git branch -D pr-$PR_NUMBER
-```
+| Task | MCP Tool |
+|------|----------|
+| Get PR details | `mcp_github_pull_request_read(method="get")` |
+| Get PR diff | `mcp_github_pull_request_read(method="get_diff")` |
+| Get changed files | `mcp_github_pull_request_read(method="get_files")` |
+| Get CI status | `mcp_github_pull_request_read(method="get_status")` |
+| Get check runs | `mcp_github_pull_request_read(method="get_check_runs")` |
+| Read file contents | `mcp_github_get_file_contents(ref="refs/pull/N/head")` |
+| Get review threads | `mcp_github_pull_request_read(method="get_review_comments")` |
+| Get PR comments | `mcp_github_pull_request_read(method="get_comments")` |
+| Get reviews | `mcp_github_pull_request_read(method="get_reviews")` |
+| Create pending review | `mcp_github_pull_request_review_write(method="create")` |
+| Add inline comment | `mcp_github_add_comment_to_pending_review()` |
+| Submit review | `mcp_github_pull_request_review_write(method="submit_pending")` |
+| Add PR comment | `mcp_github_add_issue_comment()` |
+| Reply to comment | `mcp_github_add_reply_to_pull_request_comment()` |
+| Scan for secrets | `mcp_github_run_secret_scanning()` |
+| Request Copilot review | `mcp_github_request_copilot_review()` |

 ### Decision: Approve vs Request Changes vs Comment

@@ -820,24 +820,6 @@ Every successful ML paper centers on what Neel Nanda calls "the narrative": a sh

 **If you cannot state your contribution in one sentence, you don't yet have a paper.**

-### The Sources Behind This Guidance
-
-This skill synthesizes writing philosophy from researchers who have published extensively at top venues. The writing philosophy layer was originally compiled by [Orchestra Research](https://github.com/orchestra-research) as the `ml-paper-writing` skill.
-
-| Source | Key Contribution | Link |
-|--------|-----------------|------|
-| **Neel Nanda** (Google DeepMind) | The Narrative Principle, What/Why/So What framework | [How to Write ML Papers](https://www.alignmentforum.org/posts/eJGptPbbFPZGLpjsp/highly-opinionated-advice-on-how-to-write-ml-papers) |
-| **Sebastian Farquhar** (DeepMind) | 5-sentence abstract formula | [How to Write ML Papers](https://sebastianfarquhar.com/on-research/2024/11/04/how_to_write_ml_papers/) |
-| **Gopen & Swan** | 7 principles of reader expectations | [Science of Scientific Writing](https://cseweb.ucsd.edu/~swanson/papers/science-of-writing.pdf) |
-| **Zachary Lipton** | Word choice, eliminating hedging | [Heuristics for Scientific Writing](https://www.approximatelycorrect.com/2018/01/29/heuristics-technical-scientific-writing-machine-learning-perspective/) |
-| **Jacob Steinhardt** (UC Berkeley) | Precision, consistent terminology | [Writing Tips](https://bounded-regret.ghost.io/) |
-| **Ethan Perez** (Anthropic) | Micro-level clarity tips | [Easy Paper Writing Tips](https://ethanperez.net/easy-paper-writing-tips/) |
-| **Andrej Karpathy** | Single contribution focus | Various lectures |
-
-**For deeper dives into any of these, see:**
- [references/writing-guide.md](references/writing-guide.md) — Full explanations with examples
- [references/sources.md](references/sources.md) — Complete bibliography
-
 ### Time Allocation

 Spend approximately **equal time** on each of:
@@ -4,12 +4,6 @@ This document lists all authoritative sources used to build this skill, organize

 ---

-## Origin & Attribution
-
-The writing philosophy, citation verification workflow, and conference reference materials in this skill were originally compiled by **[Orchestra Research](https://github.com/orchestra-research)** as the `ml-paper-writing` skill (January 2026), drawing on Neel Nanda's blog post and other researcher guides listed below. The skill was integrated into hermes-agent by teknium (January 2026), then expanded into the current `research-paper-writing` pipeline by SHL0MS (April 2026, PR #4654), which added experiment design, execution monitoring, iterative refinement, and submission phases while preserving the original writing philosophy and reference files.
-
---
-
 ## Writing Philosophy & Guides

 ### Primary Sources (Must-Read)
@@ -971,74 +971,6 @@ class TestTaskSpecificOverrides:
            client, model = get_text_auxiliary_client("compression")
        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter

-    def test_resolve_auto_prefers_live_main_runtime_over_persisted_config(self, monkeypatch, tmp_path):
-        """Session-only live model switches should override persisted config for auto routing."""
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """model:
-  default: glm-5.1
-  provider: opencode-go
-compression:
-  summary_provider: auto
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-        calls = []
-
-        def _fake_resolve(provider, model=None, *args, **kwargs):
-            calls.append((provider, model, kwargs))
-            return MagicMock(), model or "resolved-model"
-
-        with patch("agent.auxiliary_client.resolve_provider_client", side_effect=_fake_resolve):
-            client, model = _resolve_auto(
-                main_runtime={
-                    "provider": "openai-codex",
-                    "model": "gpt-5.4",
-                    "api_mode": "codex_responses",
-                }
-            )
-
-        assert client is not None
-        assert model == "gpt-5.4"
-        assert calls[0][0] == "openai-codex"
-        assert calls[0][1] == "gpt-5.4"
-        assert calls[0][2]["api_mode"] == "codex_responses"
-
-    def test_explicit_compression_pin_still_wins_over_live_main_runtime(self, monkeypatch, tmp_path):
-        """Task-level compression config should beat a live session override."""
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """auxiliary:
-  compression:
-    provider: openrouter
-    model: google/gemini-3-flash-preview
-model:
-  default: glm-5.1
-  provider: opencode-go
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-        with patch("agent.auxiliary_client.resolve_provider_client", return_value=(MagicMock(), "google/gemini-3-flash-preview")) as mock_resolve:
-            client, model = get_text_auxiliary_client(
-                "compression",
-                main_runtime={
-                    "provider": "openai-codex",
-                    "model": "gpt-5.4",
-                },
-            )
-
-        assert client is not None
-        assert model == "google/gemini-3-flash-preview"
-        assert mock_resolve.call_args.args[0] == "openrouter"
-        assert mock_resolve.call_args.kwargs["main_runtime"] == {
-            "provider": "openai-codex",
-            "model": "gpt-5.4",
-        }
-
    def test_compression_summary_base_url_from_config(self, monkeypatch, tmp_path):
        """compression.summary_base_url should produce a custom-endpoint client."""
        hermes_home = tmp_path / "hermes"
@@ -1628,74 +1560,3 @@ class TestStaleBaseUrlWarning:

        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
            "Warning should not fire a second time"
-
-
-# ---------------------------------------------------------------------------
-# Anthropic-compatible image block conversion
-# ---------------------------------------------------------------------------
-
-class TestAnthropicCompatImageConversion:
-    """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic."""
-
-    def test_known_providers_detected(self):
-        from agent.auxiliary_client import _is_anthropic_compat_endpoint
-        assert _is_anthropic_compat_endpoint("minimax", "")
-        assert _is_anthropic_compat_endpoint("minimax-cn", "")
-
-    def test_openrouter_not_detected(self):
-        from agent.auxiliary_client import _is_anthropic_compat_endpoint
-        assert not _is_anthropic_compat_endpoint("openrouter", "")
-        assert not _is_anthropic_compat_endpoint("anthropic", "")
-
-    def test_url_based_detection(self):
-        from agent.auxiliary_client import _is_anthropic_compat_endpoint
-        assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic")
-        assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1")
-        assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1")
-
-    def test_base64_image_converted(self):
-        from agent.auxiliary_client import _convert_openai_images_to_anthropic
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "describe"},
-                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}}
-            ]
-        }]
-        result = _convert_openai_images_to_anthropic(messages)
-        img_block = result[0]["content"][1]
-        assert img_block["type"] == "image"
-        assert img_block["source"]["type"] == "base64"
-        assert img_block["source"]["media_type"] == "image/png"
-        assert img_block["source"]["data"] == "iVBOR="
-
-    def test_url_image_converted(self):
-        from agent.auxiliary_client import _convert_openai_images_to_anthropic
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}
-            ]
-        }]
-        result = _convert_openai_images_to_anthropic(messages)
-        img_block = result[0]["content"][0]
-        assert img_block["type"] == "image"
-        assert img_block["source"]["type"] == "url"
-        assert img_block["source"]["url"] == "https://example.com/img.jpg"
-
-    def test_text_only_messages_unchanged(self):
-        from agent.auxiliary_client import _convert_openai_images_to_anthropic
-        messages = [{"role": "user", "content": "Hello"}]
-        result = _convert_openai_images_to_anthropic(messages)
-        assert result[0] is messages[0]  # same object, not copied
-
-    def test_jpeg_media_type_parsed(self):
-        from agent.auxiliary_client import _convert_openai_images_to_anthropic
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}}
-            ]
-        }]
-        result = _convert_openai_images_to_anthropic(messages)
-        assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
@@ -191,37 +191,6 @@ class TestNonStringContent:
        kwargs = mock_call.call_args.kwargs
        assert "temperature" not in kwargs

-    def test_summary_call_passes_live_main_runtime(self):
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = "ok"
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="gpt-5.4",
-                provider="openai-codex",
-                base_url="https://chatgpt.com/backend-api/codex",
-                api_key="codex-token",
-                api_mode="codex_responses",
-                quiet_mode=True,
-            )
-
-        messages = [
-            {"role": "user", "content": "do something"},
-            {"role": "assistant", "content": "ok"},
-        ]
-
-        with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
-            c._generate_summary(messages)
-
-        assert mock_call.call_args.kwargs["main_runtime"] == {
-            "model": "gpt-5.4",
-            "provider": "openai-codex",
-            "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "codex-token",
-            "api_mode": "codex_responses",
-        }
-

 class TestSummaryFailureCooldown:
    def test_summary_failure_enters_cooldown_and_skips_retry(self):
@@ -308,34 +308,6 @@ class TestMinimaxPreserveDots:
        from run_agent import AIAgent
        assert AIAgent._anthropic_preserve_dots(agent) is False

-    def test_opencode_zen_provider_preserves_dots(self):
-        from types import SimpleNamespace
-        agent = SimpleNamespace(provider="opencode-zen", base_url="")
-        from run_agent import AIAgent
-        assert AIAgent._anthropic_preserve_dots(agent) is True
-
-    def test_opencode_zen_url_preserves_dots(self):
-        from types import SimpleNamespace
-        agent = SimpleNamespace(provider="custom", base_url="https://opencode.ai/zen/v1")
-        from run_agent import AIAgent
-        assert AIAgent._anthropic_preserve_dots(agent) is True
-
-    def test_zai_provider_preserves_dots(self):
-        from types import SimpleNamespace
-        agent = SimpleNamespace(provider="zai", base_url="")
-        from run_agent import AIAgent
-        assert AIAgent._anthropic_preserve_dots(agent) is True
-
-    def test_bigmodel_cn_url_preserves_dots(self):
-        from types import SimpleNamespace
-        agent = SimpleNamespace(provider="custom", base_url="https://open.bigmodel.cn/api/paas/v4")
-        from run_agent import AIAgent
-        assert AIAgent._anthropic_preserve_dots(agent) is True
-
-    def test_normalize_preserves_m25_free_dot(self):
-        from agent.anthropic_adapter import normalize_model_name
-        assert normalize_model_name("minimax-m2.5-free", preserve_dots=True) == "minimax-m2.5-free"
-
    def test_normalize_preserves_m27_dot(self):
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
@@ -70,44 +70,6 @@ class TestQueryLocalContextLengthOllama:

        assert result == 32768

-    def test_ollama_num_ctx_wins_over_model_info(self):
-        """When both num_ctx (Modelfile) and model_info (GGUF) are present,
-        num_ctx wins because it's the *runtime* context Ollama actually
-        allocates KV cache for. The GGUF model_info.context_length is the
-        training max — using it would let Hermes grow conversations past
-        the runtime limit and Ollama would silently truncate.
-
-        Concrete example: hermes-brain:qwen3-14b-ctx32k is a Modelfile
-        derived from qwen3:14b with `num_ctx 32768`, but the underlying
-        GGUF reports `qwen3.context_length: 40960` (training max). If
-        Hermes used 40960 it would let the conversation grow past 32768
-        before compressing, and Ollama would truncate the prefix.
-        """
-        from agent.model_metadata import _query_local_context_length
-
-        show_resp = self._make_resp(200, {
-            "model_info": {"qwen3.context_length": 40960},
-            "parameters": "num_ctx                        32768\ntemperature                    0.6\n",
-        })
-        models_resp = self._make_resp(404, {})
-
-        client_mock = MagicMock()
-        client_mock.__enter__ = lambda s: client_mock
-        client_mock.__exit__ = MagicMock(return_value=False)
-        client_mock.post.return_value = show_resp
-        client_mock.get.return_value = models_resp
-
-        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"), \
-             patch("httpx.Client", return_value=client_mock):
-            result = _query_local_context_length(
-                "hermes-brain:qwen3-14b-ctx32k", "http://100.77.243.5:11434/v1"
-            )
-
-        assert result == 32768, (
-            f"Expected num_ctx (32768) to win over model_info (40960), got {result}. "
-            "If Hermes uses the GGUF training max, conversations will silently truncate."
-        )
-
    def test_ollama_show_404_falls_through(self):
        """When /api/show returns 404, falls through to /v1/models/{model}."""
        from agent.model_metadata import _query_local_context_length
@@ -87,10 +87,7 @@ class TestProviderMapping:

    def test_unmapped_provider_not_in_dict(self):
        assert "nous" not in PROVIDER_TO_MODELS_DEV
-
-    def test_openai_codex_mapped_to_openai(self):
-        assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
-        assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"
+        assert "openai-codex" not in PROVIDER_TO_MODELS_DEV


 class TestExtractContext:
@@ -18,7 +18,6 @@ from agent.prompt_builder import (
    build_skills_system_prompt,
    build_nous_subscription_prompt,
    build_context_files_prompt,
-    build_environment_hints,
    CONTEXT_FILE_MAX_CHARS,
    DEFAULT_AGENT_IDENTITY,
    TOOL_USE_ENFORCEMENT_GUIDANCE,
@@ -27,7 +26,6 @@ from agent.prompt_builder import (
    MEMORY_GUIDANCE,
    SESSION_SEARCH_GUIDANCE,
    PLATFORM_HINTS,
-    WSL_ENVIRONMENT_HINT,
 )
 from hermes_cli.nous_subscription import NousFeatureState, NousSubscriptionFeatures

@@ -772,29 +770,6 @@ class TestPromptBuilderConstants:
        assert "cli" in PLATFORM_HINTS


-# =========================================================================
-# Environment hints
-# =========================================================================
-
-class TestEnvironmentHints:
-    def test_wsl_hint_constant_mentions_mnt(self):
-        assert "/mnt/c/" in WSL_ENVIRONMENT_HINT
-        assert "WSL" in WSL_ENVIRONMENT_HINT
-
-    def test_build_environment_hints_on_wsl(self, monkeypatch):
-        import agent.prompt_builder as _pb
-        monkeypatch.setattr(_pb, "is_wsl", lambda: True)
-        result = _pb.build_environment_hints()
-        assert "/mnt/" in result
-        assert "WSL" in result
-
-    def test_build_environment_hints_not_wsl(self, monkeypatch):
-        import agent.prompt_builder as _pb
-        monkeypatch.setattr(_pb, "is_wsl", lambda: False)
-        result = _pb.build_environment_hints()
-        assert result == ""
-
-
 # =========================================================================
 # Conditional skill activation
 # =========================================================================
@@ -180,71 +180,33 @@ class TestDisplayResumedHistory:
        assert 200 <= a_count <= 310  # roughly 300 chars (±panel padding)

    def test_long_assistant_message_truncated(self):
-        """Non-last assistant messages are still truncated."""
        cli = _make_cli()
        long_text = "B" * 400
        cli.conversation_history = [
            {"role": "user", "content": "Tell me a lot."},
            {"role": "assistant", "content": long_text},
-            {"role": "user", "content": "And more?"},
-            {"role": "assistant", "content": "Short final reply."},
        ]
        output = self._capture_display(cli)

-        # The non-last assistant message should be truncated
+        assert "..." in output
        assert "B" * 400 not in output
-        # The last assistant message shown in full
-        assert "Short final reply." in output

    def test_multiline_assistant_truncated(self):
-        """Non-last multiline assistant messages are truncated to 3 lines."""
        cli = _make_cli()
        multi = "\n".join([f"Line {i}" for i in range(20)])
        cli.conversation_history = [
            {"role": "user", "content": "Show me lines."},
            {"role": "assistant", "content": multi},
-            {"role": "user", "content": "What else?"},
-            {"role": "assistant", "content": "Done."},
        ]
        output = self._capture_display(cli)

-        # First 3 lines of non-last assistant should be there
+        # First 3 lines should be there
        assert "Line 0" in output
        assert "Line 1" in output
        assert "Line 2" in output
-        # Line 19 should NOT be in the truncated message
+        # Line 19 should NOT be there (truncated after 3 lines)
        assert "Line 19" not in output

-    def test_last_assistant_response_shown_in_full(self):
-        """The last assistant response is shown un-truncated so the user
-        knows where they left off without wasting tokens re-asking."""
-        cli = _make_cli()
-        long_text = "X" * 500
-        cli.conversation_history = [
-            {"role": "user", "content": "Tell me everything."},
-            {"role": "assistant", "content": long_text},
-        ]
-        output = self._capture_display(cli)
-
-        # Full 500-char text should be present (may be line-wrapped by Rich)
-        x_count = output.count("X")
-        assert x_count >= 490  # allow small Rich formatting variance
-
-    def test_last_assistant_multiline_shown_in_full(self):
-        """The last assistant response shows all lines, not just 3."""
-        cli = _make_cli()
-        multi = "\n".join([f"Line {i}" for i in range(20)])
-        cli.conversation_history = [
-            {"role": "user", "content": "Show me everything."},
-            {"role": "assistant", "content": multi},
-        ]
-        output = self._capture_display(cli)
-
-        # All 20 lines should be present since it's the last response
-        assert "Line 0" in output
-        assert "Line 10" in output
-        assert "Line 19" in output
-
    def test_large_history_shows_truncation_indicator(self):
        cli = _make_cli()
        cli.conversation_history = _large_history(n_exchanges=15)
@@ -1,87 +0,0 @@
-"""Tests for _normalize_chat_content in the API server adapter."""
-
-from gateway.platforms.api_server import _normalize_chat_content
-
-
-class TestNormalizeChatContent:
-    """Content normalization converts array-based content parts to plain text."""
-
-    def test_none_returns_empty_string(self):
-        assert _normalize_chat_content(None) == ""
-
-    def test_plain_string_returned_as_is(self):
-        assert _normalize_chat_content("hello world") == "hello world"
-
-    def test_empty_string_returned_as_is(self):
-        assert _normalize_chat_content("") == ""
-
-    def test_text_content_part(self):
-        content = [{"type": "text", "text": "hello"}]
-        assert _normalize_chat_content(content) == "hello"
-
-    def test_input_text_content_part(self):
-        content = [{"type": "input_text", "text": "user input"}]
-        assert _normalize_chat_content(content) == "user input"
-
-    def test_output_text_content_part(self):
-        content = [{"type": "output_text", "text": "assistant output"}]
-        assert _normalize_chat_content(content) == "assistant output"
-
-    def test_multiple_text_parts_joined_with_newline(self):
-        content = [
-            {"type": "text", "text": "first"},
-            {"type": "text", "text": "second"},
-        ]
-        assert _normalize_chat_content(content) == "first\nsecond"
-
-    def test_mixed_string_and_dict_parts(self):
-        content = ["plain string", {"type": "text", "text": "dict part"}]
-        assert _normalize_chat_content(content) == "plain string\ndict part"
-
-    def test_image_url_parts_silently_skipped(self):
-        content = [
-            {"type": "text", "text": "check this:"},
-            {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
-        ]
-        assert _normalize_chat_content(content) == "check this:"
-
-    def test_integer_content_converted(self):
-        assert _normalize_chat_content(42) == "42"
-
-    def test_boolean_content_converted(self):
-        assert _normalize_chat_content(True) == "True"
-
-    def test_deeply_nested_list_respects_depth_limit(self):
-        """Nesting beyond max_depth returns empty string."""
-        content = [[[[[[[[[[[["deep"]]]]]]]]]]]]
-        result = _normalize_chat_content(content)
-        # The deep nesting should be truncated, not crash
-        assert isinstance(result, str)
-
-    def test_large_list_capped(self):
-        """Lists beyond MAX_CONTENT_LIST_SIZE are truncated."""
-        content = [{"type": "text", "text": f"item{i}"} for i in range(2000)]
-        result = _normalize_chat_content(content)
-        # Should not contain all 2000 items
-        assert result.count("item") <= 1000
-
-    def test_oversized_string_truncated(self):
-        """Strings beyond 64KB are truncated."""
-        huge = "x" * 100_000
-        result = _normalize_chat_content(huge)
-        assert len(result) == 65_536
-
-    def test_empty_text_parts_filtered(self):
-        content = [
-            {"type": "text", "text": ""},
-            {"type": "text", "text": "actual"},
-            {"type": "text", "text": ""},
-        ]
-        assert _normalize_chat_content(content) == "actual"
-
-    def test_dict_without_type_skipped(self):
-        content = [{"foo": "bar"}, {"type": "text", "text": "real"}]
-        assert _normalize_chat_content(content) == "real"
-
-    def test_empty_list_returns_empty(self):
-        assert _normalize_chat_content([]) == ""
@@ -1,226 +0,0 @@
-"""Tests for the clean shutdown marker that prevents unwanted session auto-resets.
-
-When the gateway shuts down gracefully (hermes update, gateway restart, /restart),
-it writes a .clean_shutdown marker.  On the next startup, if the marker exists,
-suspend_recently_active() is skipped so users don't lose their sessions.
-
-After a crash (no marker), suspension still fires as a safety net for stuck sessions.
-"""
-
-import os
-from datetime import datetime, timedelta
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from gateway.config import GatewayConfig, Platform, PlatformConfig, SessionResetPolicy
-from gateway.session import SessionEntry, SessionSource, SessionStore
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _make_source(platform=Platform.TELEGRAM, chat_id="123", user_id="u1"):
-    return SessionSource(platform=platform, chat_id=chat_id, user_id=user_id)
-
-
-def _make_store(tmp_path, policy=None):
-    config = GatewayConfig()
-    if policy:
-        config.default_reset_policy = policy
-    return SessionStore(sessions_dir=tmp_path, config=config)
-
-
-# ---------------------------------------------------------------------------
-# SessionStore.suspend_recently_active
-# ---------------------------------------------------------------------------
-
-class TestSuspendRecentlyActive:
-    """Verify suspend_recently_active only marks recent sessions."""
-
-    def test_suspends_recently_active_sessions(self, tmp_path):
-        store = _make_store(tmp_path)
-        source = _make_source()
-        entry = store.get_or_create_session(source)
-        assert not entry.suspended
-
-        count = store.suspend_recently_active()
-        assert count == 1
-
-        # Re-fetch — should be suspended now
-        refreshed = store.get_or_create_session(source)
-        assert refreshed.was_auto_reset
-
-    def test_does_not_suspend_old_sessions(self, tmp_path):
-        store = _make_store(tmp_path)
-        source = _make_source()
-        entry = store.get_or_create_session(source)
-
-        # Backdate the session's updated_at beyond the cutoff
-        with store._lock:
-            entry.updated_at = datetime.now() - timedelta(seconds=300)
-            store._save()
-
-        count = store.suspend_recently_active(max_age_seconds=120)
-        assert count == 0
-
-    def test_already_suspended_not_double_counted(self, tmp_path):
-        store = _make_store(tmp_path)
-        source = _make_source()
-        entry = store.get_or_create_session(source)
-
-        # Suspend once
-        count1 = store.suspend_recently_active()
-        assert count1 == 1
-
-        # Create a new session (the old one got reset on next access)
-        entry2 = store.get_or_create_session(source)
-
-        # Suspend again — the new session is recent but not yet suspended
-        count2 = store.suspend_recently_active()
-        assert count2 == 1
-
-
-# ---------------------------------------------------------------------------
-# Clean shutdown marker integration
-# ---------------------------------------------------------------------------
-
-class TestCleanShutdownMarker:
-    """Test that the marker file controls session suspension on startup."""
-
-    def test_marker_written_on_graceful_stop(self, tmp_path, monkeypatch):
-        """stop() should write .clean_shutdown marker."""
-        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
-        marker = tmp_path / ".clean_shutdown"
-        assert not marker.exists()
-
-        # Create a minimal runner and call the shutdown logic directly
-        from gateway.run import GatewayRunner
-        runner = object.__new__(GatewayRunner)
-        runner._restart_requested = False
-        runner._restart_detached = False
-        runner._restart_via_service = False
-        runner._restart_task_started = False
-        runner._running = True
-        runner._draining = False
-        runner._stop_task = None
-        runner._running_agents = {}
-        runner._pending_messages = {}
-        runner._pending_approvals = {}
-        runner._background_tasks = set()
-        runner._shutdown_event = MagicMock()
-        runner._restart_drain_timeout = 5
-        runner._exit_code = None
-        runner._exit_reason = None
-        runner.adapters = {}
-        runner.config = GatewayConfig()
-
-        # Mock heavy dependencies
-        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
-             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
-             patch("gateway.run.GatewayRunner._update_runtime_status"), \
-             patch("gateway.status.remove_pid_file"), \
-             patch("tools.process_registry.process_registry") as mock_proc_reg, \
-             patch("tools.terminal_tool.cleanup_all_environments"), \
-             patch("tools.browser_tool.cleanup_all_browsers"):
-            mock_proc_reg.kill_all = MagicMock()
-
-            import asyncio
-            asyncio.get_event_loop().run_until_complete(runner.stop())
-
-        assert marker.exists(), ".clean_shutdown marker should exist after graceful stop"
-
-    def test_marker_skips_suspension_on_startup(self, tmp_path, monkeypatch):
-        """If .clean_shutdown exists, suspend_recently_active should NOT be called."""
-        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
-
-        # Create the marker
-        marker = tmp_path / ".clean_shutdown"
-        marker.touch()
-
-        # Create a store with a recently active session
-        store = _make_store(tmp_path)
-        source = _make_source()
-        entry = store.get_or_create_session(source)
-        assert not entry.suspended
-
-        # Simulate what start() does:
-        if marker.exists():
-            marker.unlink()
-            # Should NOT call suspend_recently_active
-        else:
-            store.suspend_recently_active()
-
-        # Session should NOT be suspended
-        with store._lock:
-            store._ensure_loaded_locked()
-            for e in store._entries.values():
-                assert not e.suspended, "Session should NOT be suspended after clean shutdown"
-
-        assert not marker.exists(), "Marker should be cleaned up"
-
-    def test_no_marker_triggers_suspension(self, tmp_path, monkeypatch):
-        """Without .clean_shutdown marker (crash), suspension should fire."""
-        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
-
-        marker = tmp_path / ".clean_shutdown"
-        assert not marker.exists()
-
-        # Create a store with a recently active session
-        store = _make_store(tmp_path)
-        source = _make_source()
-        entry = store.get_or_create_session(source)
-        assert not entry.suspended
-
-        # Simulate what start() does:
-        if marker.exists():
-            marker.unlink()
-        else:
-            store.suspend_recently_active()
-
-        # Session SHOULD be suspended (crash recovery)
-        with store._lock:
-            store._ensure_loaded_locked()
-            suspended_count = sum(1 for e in store._entries.values() if e.suspended)
-        assert suspended_count == 1, "Session should be suspended after crash (no marker)"
-
-    def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
-        """stop(restart=True) should also write the marker."""
-        monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
-        marker = tmp_path / ".clean_shutdown"
-
-        from gateway.run import GatewayRunner
-        runner = object.__new__(GatewayRunner)
-        runner._restart_requested = False
-        runner._restart_detached = False
-        runner._restart_via_service = False
-        runner._restart_task_started = False
-        runner._running = True
-        runner._draining = False
-        runner._stop_task = None
-        runner._running_agents = {}
-        runner._pending_messages = {}
-        runner._pending_approvals = {}
-        runner._background_tasks = set()
-        runner._shutdown_event = MagicMock()
-        runner._restart_drain_timeout = 5
-        runner._exit_code = None
-        runner._exit_reason = None
-        runner.adapters = {}
-        runner.config = GatewayConfig()
-
-        with patch("gateway.run.GatewayRunner._drain_active_agents", new_callable=AsyncMock, return_value=([], False)), \
-             patch("gateway.run.GatewayRunner._finalize_shutdown_agents"), \
-             patch("gateway.run.GatewayRunner._update_runtime_status"), \
-             patch("gateway.status.remove_pid_file"), \
-             patch("tools.process_registry.process_registry") as mock_proc_reg, \
-             patch("tools.terminal_tool.cleanup_all_environments"), \
-             patch("tools.browser_tool.cleanup_all_browsers"):
-            mock_proc_reg.kill_all = MagicMock()
-
-            import asyncio
-            asyncio.get_event_loop().run_until_complete(runner.stop(restart=True))
-
-        assert marker.exists(), ".clean_shutdown marker should exist after restart-stop too"
@@ -124,7 +124,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_off_mode_no_reply_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -137,7 +137,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_first_mode_only_first_chunk_references(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -152,7 +152,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_all_mode_all_chunks_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -165,7 +165,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_no_reply_to_param_no_reference(self):
        adapter, channel, ref_msg = _make_discord_adapter("all")
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]

        await adapter.send("12345", "test content", reply_to=None)

@@ -176,7 +176,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_respects_first_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("first")
-        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -187,7 +187,7 @@ class TestSendWithReplyToMode:
    @pytest.mark.asyncio
    async def test_single_chunk_off_mode(self):
        adapter, channel, ref_msg = _make_discord_adapter("off")
-        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -200,7 +200,7 @@ class TestSendWithReplyToMode:
    async def test_invalid_mode_falls_back_to_first_behavior(self):
        """Invalid mode behaves like 'first' — only first chunk gets reference."""
        adapter, channel, ref_msg = _make_discord_adapter("banana")
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]

        await adapter.send("12345", "test", reply_to="999")

@@ -189,14 +189,14 @@ class TestPlatformDefaults:
        """Slack, Mattermost, Matrix default to 'new' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("slack", "mattermost", "matrix", "feishu", "whatsapp"):
+        for plat in ("slack", "mattermost", "matrix", "feishu"):
            assert resolve_display_setting({}, plat, "tool_progress") == "new", plat

    def test_low_tier_platforms(self):
-        """Signal, BlueBubbles, etc. default to 'off' tool progress."""
+        """Signal, WhatsApp, etc. default to 'off' tool progress."""
        from gateway.display_config import resolve_display_setting

-        for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"):
+        for plat in ("signal", "whatsapp", "bluebubbles", "weixin", "wecom", "dingtalk"):
            assert resolve_display_setting({}, plat, "tool_progress") == "off", plat

    def test_minimal_tier_platforms(self):
@@ -1,438 +0,0 @@
-"""Tests for gateway.platforms.feishu — Feishu scan-to-create registration."""
-
-import json
-from unittest.mock import patch, MagicMock
-import pytest
-
-
-def _mock_urlopen(response_data, status=200):
-    """Create a mock for urllib.request.urlopen that returns JSON response_data."""
-    mock_response = MagicMock()
-    mock_response.read.return_value = json.dumps(response_data).encode("utf-8")
-    mock_response.status = status
-    mock_response.__enter__ = lambda s: s
-    mock_response.__exit__ = MagicMock(return_value=False)
-    return mock_response
-
-
-class TestPostRegistration:
-    """Tests for the low-level HTTP helper."""
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_post_registration_returns_parsed_json(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _post_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({"nonce": "abc", "supported_auth_methods": ["client_secret"]})
-        result = _post_registration("https://accounts.feishu.cn", {"action": "init"})
-        assert result["nonce"] == "abc"
-        assert "client_secret" in result["supported_auth_methods"]
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_post_registration_sends_form_encoded_body(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _post_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({})
-        _post_registration("https://accounts.feishu.cn", {"action": "init", "key": "val"})
-        call_args = mock_urlopen_fn.call_args
-        request = call_args[0][0]
-        body = request.data.decode("utf-8")
-        assert "action=init" in body
-        assert "key=val" in body
-        assert request.get_header("Content-type") == "application/x-www-form-urlencoded"
-
-
-class TestInitRegistration:
-    """Tests for the init step."""
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_init_succeeds_when_client_secret_supported(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _init_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "nonce": "abc",
-            "supported_auth_methods": ["client_secret"],
-        })
-        _init_registration("feishu")
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_init_raises_when_client_secret_not_supported(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _init_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "nonce": "abc",
-            "supported_auth_methods": ["other_method"],
-        })
-        with pytest.raises(RuntimeError, match="client_secret"):
-            _init_registration("feishu")
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_init_uses_lark_url_for_lark_domain(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _init_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "nonce": "abc",
-            "supported_auth_methods": ["client_secret"],
-        })
-        _init_registration("lark")
-        call_args = mock_urlopen_fn.call_args
-        request = call_args[0][0]
-        assert "larksuite.com" in request.full_url
-
-
-class TestBeginRegistration:
-    """Tests for the begin step."""
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_begin_returns_device_code_and_qr_url(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _begin_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "device_code": "dc_123",
-            "verification_uri_complete": "https://accounts.feishu.cn/qr/abc",
-            "user_code": "ABCD-1234",
-            "interval": 5,
-            "expire_in": 600,
-        })
-        result = _begin_registration("feishu")
-        assert result["device_code"] == "dc_123"
-        assert "qr_url" in result
-        assert "accounts.feishu.cn" in result["qr_url"]
-        assert result["user_code"] == "ABCD-1234"
-        assert result["interval"] == 5
-        assert result["expire_in"] == 600
-
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_begin_sends_correct_archetype(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import _begin_registration
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "device_code": "dc_123",
-            "verification_uri_complete": "https://example.com/qr",
-            "user_code": "X",
-            "interval": 5,
-            "expire_in": 600,
-        })
-        _begin_registration("feishu")
-        request = mock_urlopen_fn.call_args[0][0]
-        body = request.data.decode("utf-8")
-        assert "archetype=PersonalAgent" in body
-        assert "auth_method=client_secret" in body
-
-
-class TestPollRegistration:
-    """Tests for the poll step."""
-
-    @patch("gateway.platforms.feishu.time")
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_poll_returns_credentials_on_success(self, mock_urlopen_fn, mock_time):
-        from gateway.platforms.feishu import _poll_registration
-
-        mock_time.time.side_effect = [0, 1]
-        mock_time.sleep = MagicMock()
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "client_id": "cli_app123",
-            "client_secret": "secret456",
-            "user_info": {"open_id": "ou_owner", "tenant_brand": "feishu"},
-        })
-        result = _poll_registration(
-            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
-        )
-        assert result is not None
-        assert result["app_id"] == "cli_app123"
-        assert result["app_secret"] == "secret456"
-        assert result["domain"] == "feishu"
-        assert result["open_id"] == "ou_owner"
-
-    @patch("gateway.platforms.feishu.time")
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_poll_switches_domain_on_lark_tenant_brand(self, mock_urlopen_fn, mock_time):
-        from gateway.platforms.feishu import _poll_registration
-
-        mock_time.time.side_effect = [0, 1, 2]
-        mock_time.sleep = MagicMock()
-
-        pending_resp = _mock_urlopen({
-            "error": "authorization_pending",
-            "user_info": {"tenant_brand": "lark"},
-        })
-        success_resp = _mock_urlopen({
-            "client_id": "cli_lark",
-            "client_secret": "secret_lark",
-            "user_info": {"open_id": "ou_lark", "tenant_brand": "lark"},
-        })
-        mock_urlopen_fn.side_effect = [pending_resp, success_resp]
-
-        result = _poll_registration(
-            device_code="dc_123", interval=0, expire_in=60, domain="feishu"
-        )
-        assert result is not None
-        assert result["domain"] == "lark"
-
-    @patch("gateway.platforms.feishu.time")
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_poll_success_with_lark_brand_in_same_response(self, mock_urlopen_fn, mock_time):
-        """Credentials and lark tenant_brand in one response must not be discarded."""
-        from gateway.platforms.feishu import _poll_registration
-
-        mock_time.time.side_effect = [0, 1]
-        mock_time.sleep = MagicMock()
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "client_id": "cli_lark_direct",
-            "client_secret": "secret_lark_direct",
-            "user_info": {"open_id": "ou_lark_direct", "tenant_brand": "lark"},
-        })
-        result = _poll_registration(
-            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
-        )
-        assert result is not None
-        assert result["app_id"] == "cli_lark_direct"
-        assert result["domain"] == "lark"
-        assert result["open_id"] == "ou_lark_direct"
-
-    @patch("gateway.platforms.feishu.time")
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_poll_returns_none_on_access_denied(self, mock_urlopen_fn, mock_time):
-        from gateway.platforms.feishu import _poll_registration
-
-        mock_time.time.side_effect = [0, 1]
-        mock_time.sleep = MagicMock()
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "error": "access_denied",
-        })
-        result = _poll_registration(
-            device_code="dc_123", interval=1, expire_in=60, domain="feishu"
-        )
-        assert result is None
-
-    @patch("gateway.platforms.feishu.time")
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_poll_returns_none_on_timeout(self, mock_urlopen_fn, mock_time):
-        from gateway.platforms.feishu import _poll_registration
-
-        mock_time.time.side_effect = [0, 999]
-        mock_time.sleep = MagicMock()
-
-        mock_urlopen_fn.return_value = _mock_urlopen({
-            "error": "authorization_pending",
-        })
-        result = _poll_registration(
-            device_code="dc_123", interval=1, expire_in=1, domain="feishu"
-        )
-        assert result is None
-
-
-class TestRenderQr:
-    """Tests for QR code terminal rendering."""
-
-    @patch("gateway.platforms.feishu._qrcode_mod", create=True)
-    def test_render_qr_returns_true_on_success(self, mock_qrcode_mod):
-        from gateway.platforms.feishu import _render_qr
-
-        mock_qr = MagicMock()
-        mock_qrcode_mod.QRCode.return_value = mock_qr
-        assert _render_qr("https://example.com/qr") is True
-        mock_qr.add_data.assert_called_once_with("https://example.com/qr")
-        mock_qr.make.assert_called_once_with(fit=True)
-        mock_qr.print_ascii.assert_called_once()
-
-    def test_render_qr_returns_false_when_qrcode_missing(self):
-        from gateway.platforms.feishu import _render_qr
-
-        with patch("gateway.platforms.feishu._qrcode_mod", None):
-            assert _render_qr("https://example.com/qr") is False
-
-
-class TestProbeBot:
-    """Tests for bot connectivity verification."""
-
-    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
-    def test_probe_returns_bot_info_on_success(self):
-        from gateway.platforms.feishu import probe_bot
-
-        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
-            mock_sdk.return_value = {"bot_name": "TestBot", "bot_open_id": "ou_bot123"}
-            result = probe_bot("cli_app", "secret", "feishu")
-
-        assert result is not None
-        assert result["bot_name"] == "TestBot"
-        assert result["bot_open_id"] == "ou_bot123"
-
-    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True)
-    def test_probe_returns_none_on_failure(self):
-        from gateway.platforms.feishu import probe_bot
-
-        with patch("gateway.platforms.feishu._probe_bot_sdk") as mock_sdk:
-            mock_sdk.return_value = None
-            result = probe_bot("bad_id", "bad_secret", "feishu")
-
-        assert result is None
-
-    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_http_fallback_when_sdk_unavailable(self, mock_urlopen_fn):
-        """Without lark_oapi, probe falls back to raw HTTP."""
-        from gateway.platforms.feishu import probe_bot
-
-        token_resp = _mock_urlopen({"code": 0, "tenant_access_token": "t-123"})
-        bot_resp = _mock_urlopen({"code": 0, "bot": {"bot_name": "HttpBot", "open_id": "ou_http"}})
-        mock_urlopen_fn.side_effect = [token_resp, bot_resp]
-
-        result = probe_bot("cli_app", "secret", "feishu")
-        assert result is not None
-        assert result["bot_name"] == "HttpBot"
-
-    @patch("gateway.platforms.feishu.FEISHU_AVAILABLE", False)
-    @patch("gateway.platforms.feishu.urlopen")
-    def test_http_fallback_returns_none_on_network_error(self, mock_urlopen_fn):
-        from gateway.platforms.feishu import probe_bot
-        from urllib.error import URLError
-
-        mock_urlopen_fn.side_effect = URLError("connection refused")
-        result = probe_bot("cli_app", "secret", "feishu")
-        assert result is None
-
-
-class TestQrRegister:
-    """Tests for the public qr_register entry point."""
-
-    @patch("gateway.platforms.feishu.probe_bot")
-    @patch("gateway.platforms.feishu._render_qr")
-    @patch("gateway.platforms.feishu._poll_registration")
-    @patch("gateway.platforms.feishu._begin_registration")
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_success_flow(
-        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
-    ):
-        from gateway.platforms.feishu import qr_register
-
-        mock_begin.return_value = {
-            "device_code": "dc_123",
-            "qr_url": "https://example.com/qr",
-            "user_code": "ABCD",
-            "interval": 1,
-            "expire_in": 60,
-        }
-        mock_poll.return_value = {
-            "app_id": "cli_app",
-            "app_secret": "secret",
-            "domain": "feishu",
-            "open_id": "ou_owner",
-        }
-        mock_probe.return_value = {"bot_name": "MyBot", "bot_open_id": "ou_bot"}
-
-        result = qr_register()
-        assert result is not None
-        assert result["app_id"] == "cli_app"
-        assert result["app_secret"] == "secret"
-        assert result["bot_name"] == "MyBot"
-        mock_init.assert_called_once()
-        mock_render.assert_called_once()
-
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_returns_none_on_init_failure(self, mock_init):
-        from gateway.platforms.feishu import qr_register
-
-        mock_init.side_effect = RuntimeError("not supported")
-        result = qr_register()
-        assert result is None
-
-    @patch("gateway.platforms.feishu._render_qr")
-    @patch("gateway.platforms.feishu._poll_registration")
-    @patch("gateway.platforms.feishu._begin_registration")
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_returns_none_on_poll_failure(
-        self, mock_init, mock_begin, mock_poll, mock_render
-    ):
-        from gateway.platforms.feishu import qr_register
-
-        mock_begin.return_value = {
-            "device_code": "dc_123",
-            "qr_url": "https://example.com/qr",
-            "user_code": "ABCD",
-            "interval": 1,
-            "expire_in": 60,
-        }
-        mock_poll.return_value = None
-
-        result = qr_register()
-        assert result is None
-
-    # -- Contract: expected errors → None, unexpected errors → propagate --
-
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_returns_none_on_network_error(self, mock_init):
-        """URLError (network down) is an expected failure → None."""
-        from gateway.platforms.feishu import qr_register
-        from urllib.error import URLError
-
-        mock_init.side_effect = URLError("DNS resolution failed")
-        result = qr_register()
-        assert result is None
-
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_returns_none_on_json_error(self, mock_init):
-        """Malformed server response is an expected failure → None."""
-        from gateway.platforms.feishu import qr_register
-
-        mock_init.side_effect = json.JSONDecodeError("bad json", "", 0)
-        result = qr_register()
-        assert result is None
-
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_propagates_unexpected_errors(self, mock_init):
-        """Bugs (e.g. AttributeError) must not be swallowed — they propagate."""
-        from gateway.platforms.feishu import qr_register
-
-        mock_init.side_effect = AttributeError("some internal bug")
-        with pytest.raises(AttributeError, match="some internal bug"):
-            qr_register()
-
-    # -- Negative paths: partial/malformed server responses --
-
-    @patch("gateway.platforms.feishu._render_qr")
-    @patch("gateway.platforms.feishu._begin_registration")
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_returns_none_when_begin_missing_device_code(
-        self, mock_init, mock_begin, mock_render
-    ):
-        """Server returns begin response without device_code → RuntimeError → None."""
-        from gateway.platforms.feishu import qr_register
-
-        mock_begin.side_effect = RuntimeError("Feishu registration did not return a device_code")
-        result = qr_register()
-        assert result is None
-
-    @patch("gateway.platforms.feishu.probe_bot")
-    @patch("gateway.platforms.feishu._render_qr")
-    @patch("gateway.platforms.feishu._poll_registration")
-    @patch("gateway.platforms.feishu._begin_registration")
-    @patch("gateway.platforms.feishu._init_registration")
-    def test_qr_register_succeeds_even_when_probe_fails(
-        self, mock_init, mock_begin, mock_poll, mock_render, mock_probe
-    ):
-        """Registration succeeds but probe fails → result with bot_name=None."""
-        from gateway.platforms.feishu import qr_register
-
-        mock_begin.return_value = {
-            "device_code": "dc_123",
-            "qr_url": "https://example.com/qr",
-            "user_code": "ABCD",
-            "interval": 1,
-            "expire_in": 60,
-        }
-        mock_poll.return_value = {
-            "app_id": "cli_app",
-            "app_secret": "secret",
-            "domain": "feishu",
-            "open_id": "ou_owner",
-        }
-        mock_probe.return_value = None  # probe failed
-
-        result = qr_register()
-        assert result is not None
-        assert result["app_id"] == "cli_app"
-        assert result["bot_name"] is None
-        assert result["bot_open_id"] is None
@@ -48,7 +48,6 @@ def _make_event(
    room_id="!room1:example.org",
    formatted_body=None,
    thread_id=None,
-    mention_user_ids=None,
 ):
    """Create a fake room message event.

@@ -61,9 +60,6 @@ def _make_event(
        content["formatted_body"] = formatted_body
        content["format"] = "org.matrix.custom.html"

-    if mention_user_ids is not None:
-        content["m.mentions"] = {"user_ids": mention_user_ids}
-
    relates_to = {}
    if thread_id:
        relates_to["rel_type"] = "m.thread"
@@ -112,44 +108,6 @@ class TestIsBotMentioned:
        # "hermesbot" should not match word-boundary check for "hermes"
        assert not self.adapter._is_bot_mentioned("hermesbot is here")

-    # m.mentions.user_ids — MSC3952 / Matrix v1.7 authoritative mentions
-    # Ported from openclaw/openclaw#64796
-
-    def test_m_mentions_user_ids_authoritative(self):
-        """m.mentions.user_ids alone is sufficient — no body text needed."""
-        assert self.adapter._is_bot_mentioned(
-            "please reply",  # no @hermes anywhere in body
-            mention_user_ids=["@hermes:example.org"],
-        )
-
-    def test_m_mentions_user_ids_with_body_mention(self):
-        """Both m.mentions and body mention — should still be True."""
-        assert self.adapter._is_bot_mentioned(
-            "hey @hermes:example.org help",
-            mention_user_ids=["@hermes:example.org"],
-        )
-
-    def test_m_mentions_user_ids_other_user_only(self):
-        """m.mentions with a different user — bot is NOT mentioned."""
-        assert not self.adapter._is_bot_mentioned(
-            "hello",
-            mention_user_ids=["@alice:example.org"],
-        )
-
-    def test_m_mentions_user_ids_empty_list(self):
-        """Empty user_ids list — falls through to text detection."""
-        assert not self.adapter._is_bot_mentioned(
-            "hello everyone",
-            mention_user_ids=[],
-        )
-
-    def test_m_mentions_user_ids_none(self):
-        """None mention_user_ids — falls through to text detection."""
-        assert not self.adapter._is_bot_mentioned(
-            "hello everyone",
-            mention_user_ids=None,
-        )
-

 class TestStripMention:
    def setup_method(self):
@@ -218,44 +176,6 @@ async def test_require_mention_html_pill(monkeypatch):
    adapter.handle_message.assert_awaited_once()


-@pytest.mark.asyncio
-async def test_require_mention_m_mentions_user_ids(monkeypatch):
-    """m.mentions.user_ids is authoritative per MSC3952 — no body mention needed.
-
-    Ported from openclaw/openclaw#64796.
-    """
-    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
-    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
-    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
-
-    adapter = _make_adapter()
-    # Body has NO mention, but m.mentions.user_ids includes the bot.
-    event = _make_event(
-        "please reply",
-        mention_user_ids=["@hermes:example.org"],
-    )
-
-    await adapter._on_room_message(event)
-    adapter.handle_message.assert_awaited_once()
-
-
-@pytest.mark.asyncio
-async def test_require_mention_m_mentions_other_user_ignored(monkeypatch):
-    """m.mentions.user_ids mentioning another user should NOT activate the bot."""
-    monkeypatch.delenv("MATRIX_REQUIRE_MENTION", raising=False)
-    monkeypatch.delenv("MATRIX_FREE_RESPONSE_ROOMS", raising=False)
-    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
-
-    adapter = _make_adapter()
-    event = _make_event(
-        "hey alice check this",
-        mention_user_ids=["@alice:example.org"],
-    )
-
-    await adapter._on_room_message(event)
-    adapter.handle_message.assert_not_awaited()
-
-
@pytest.mark.asyncio
 async def test_require_mention_dm_always_responds(monkeypatch):
    """DMs always respond regardless of mention setting."""
@@ -9,8 +9,6 @@ from gateway.platforms.base import (
    MessageEvent,
    MessageType,
    safe_url_for_log,
-    utf16_len,
-    _prefix_within_utf16_limit,
 )


@@ -450,135 +448,3 @@ class TestGetHumanDelay:
        with patch.dict(os.environ, env):
            delay = BasePlatformAdapter._get_human_delay()
            assert 0.1 <= delay <= 0.2
-
-
-# ---------------------------------------------------------------------------
-# utf16_len / _prefix_within_utf16_limit / truncate_message with len_fn
-# ---------------------------------------------------------------------------
-# Ported from nearai/ironclaw#2304 — Telegram counts message length in UTF-16
-# code units, not Unicode code-points.  Astral-plane characters (emoji, CJK
-# Extension B) are surrogate pairs: 1 Python char but 2 UTF-16 units.
-
-
-class TestUtf16Len:
-    """Verify the UTF-16 length helper."""
-
-    def test_ascii(self):
-        assert utf16_len("hello") == 5
-
-    def test_bmp_cjk(self):
-        # CJK ideographs in the BMP are 1 code unit each
-        assert utf16_len("你好") == 2
-
-    def test_emoji_surrogate_pair(self):
-        # 😀 (U+1F600) is outside BMP → 2 UTF-16 code units
-        assert utf16_len("😀") == 2
-
-    def test_mixed(self):
-        # "hi😀" = 2 + 2 = 4 UTF-16 units
-        assert utf16_len("hi😀") == 4
-
-    def test_musical_symbol(self):
-        # 𝄞 (U+1D11E) — Musical Symbol G Clef, surrogate pair
-        assert utf16_len("𝄞") == 2
-
-    def test_empty(self):
-        assert utf16_len("") == 0
-
-
-class TestPrefixWithinUtf16Limit:
-    """Verify UTF-16-aware prefix truncation."""
-
-    def test_fits_entirely(self):
-        assert _prefix_within_utf16_limit("hello", 10) == "hello"
-
-    def test_ascii_truncation(self):
-        result = _prefix_within_utf16_limit("hello world", 5)
-        assert result == "hello"
-        assert utf16_len(result) <= 5
-
-    def test_does_not_split_surrogate_pair(self):
-        # "a😀b" = 1 + 2 + 1 = 4 UTF-16 units; limit 2 should give "a"
-        result = _prefix_within_utf16_limit("a😀b", 2)
-        assert result == "a"
-        assert utf16_len(result) <= 2
-
-    def test_emoji_at_limit(self):
-        # "😀" = 2 UTF-16 units; limit 2 should include it
-        result = _prefix_within_utf16_limit("😀x", 2)
-        assert result == "😀"
-
-    def test_all_emoji(self):
-        msg = "😀" * 10  # 20 UTF-16 units
-        result = _prefix_within_utf16_limit(msg, 6)
-        assert result == "😀😀😀"
-        assert utf16_len(result) == 6
-
-    def test_empty(self):
-        assert _prefix_within_utf16_limit("", 5) == ""
-
-
-class TestTruncateMessageUtf16:
-    """Verify truncate_message respects UTF-16 lengths when len_fn=utf16_len."""
-
-    def test_short_emoji_message_no_split(self):
-        """A short message under the UTF-16 limit should not be split."""
-        msg = "Hello 😀 world"
-        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
-        assert len(chunks) == 1
-        assert chunks[0] == msg
-
-    def test_emoji_near_limit_triggers_split(self):
-        """A message at 4096 codepoints but >4096 UTF-16 units must split."""
-        # 2049 emoji = 2049 codepoints but 4098 UTF-16 units → exceeds 4096
-        msg = "😀" * 2049
-        assert len(msg) == 2049  # Python len sees 2049 chars
-        assert utf16_len(msg) == 4098  # but it's 4098 UTF-16 units
-
-        # Without UTF-16 awareness, this would NOT split (2049 < 4096)
-        chunks_naive = BasePlatformAdapter.truncate_message(msg, 4096)
-        assert len(chunks_naive) == 1, "Without len_fn, no split expected"
-
-        # With UTF-16 awareness, it MUST split
-        chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
-        assert len(chunks) > 1, "With utf16_len, message should be split"
-
-        # Each chunk must fit within the UTF-16 limit
-        for i, chunk in enumerate(chunks):
-            assert utf16_len(chunk) <= 4096, (
-                f"Chunk {i} exceeds 4096 UTF-16 units: {utf16_len(chunk)}"
-            )
-
-    def test_each_utf16_chunk_within_limit(self):
-        """All chunks produced with utf16_len must fit the limit."""
-        # Mix of BMP and astral-plane characters
-        msg = ("Hello 😀 world 🎵 test 𝄞 " * 200).strip()
-        max_len = 200
-        chunks = BasePlatformAdapter.truncate_message(msg, max_len, len_fn=utf16_len)
-        for i, chunk in enumerate(chunks):
-            u16_len = utf16_len(chunk)
-            assert u16_len <= max_len + 20, (
-                f"Chunk {i} UTF-16 length {u16_len} exceeds {max_len}"
-            )
-
-    def test_all_content_preserved(self):
-        """Splitting with utf16_len must not lose content."""
-        words = ["emoji😀", "music🎵", "cjk你好", "plain"] * 100
-        msg = " ".join(words)
-        chunks = BasePlatformAdapter.truncate_message(msg, 200, len_fn=utf16_len)
-        reassembled = " ".join(chunks)
-        for word in words:
-            assert word in reassembled, f"Word '{word}' lost during UTF-16 split"
-
-    def test_code_blocks_preserved_with_utf16(self):
-        """Code block fence handling should work with utf16_len too."""
-        msg = "Before\n```python\n" + "x = '😀'\n" * 200 + "```\nAfter"
-        chunks = BasePlatformAdapter.truncate_message(msg, 300, len_fn=utf16_len)
-        assert len(chunks) > 1
-        # Each chunk should have balanced fences
-        for i, chunk in enumerate(chunks):
-            fence_count = chunk.count("```")
-            assert fence_count % 2 == 0, (
-                f"Chunk {i} has unbalanced fences ({fence_count})"
-            )
-
@@ -1,215 +0,0 @@
-"""Tests for /restart notification — the gateway notifies the requester on comeback."""
-
-import asyncio
-import json
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-import gateway.run as gateway_run
-from gateway.config import Platform
-from gateway.platforms.base import MessageEvent, MessageType
-from gateway.session import build_session_key
-from tests.gateway.restart_test_helpers import (
-    make_restart_runner,
-    make_restart_source,
-)
-
-
-# ── _handle_restart_command writes .restart_notify.json ──────────────────
-
-
-@pytest.mark.asyncio
-async def test_restart_command_writes_notify_file(tmp_path, monkeypatch):
-    """When /restart fires, the requester's routing info is persisted to disk."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    runner, _adapter = make_restart_runner()
-    runner.request_restart = MagicMock(return_value=True)
-
-    source = make_restart_source(chat_id="42")
-    event = MessageEvent(
-        text="/restart",
-        message_type=MessageType.TEXT,
-        source=source,
-        message_id="m1",
-    )
-
-    result = await runner._handle_restart_command(event)
-    assert "Restarting" in result
-
-    notify_path = tmp_path / ".restart_notify.json"
-    assert notify_path.exists()
-    data = json.loads(notify_path.read_text())
-    assert data["platform"] == "telegram"
-    assert data["chat_id"] == "42"
-    assert "thread_id" not in data  # no thread → omitted
-
-
-@pytest.mark.asyncio
-async def test_restart_command_uses_service_restart_under_systemd(tmp_path, monkeypatch):
-    """Under systemd (INVOCATION_ID set), /restart uses via_service=True."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-    monkeypatch.setenv("INVOCATION_ID", "abc123")
-
-    runner, _adapter = make_restart_runner()
-    runner.request_restart = MagicMock(return_value=True)
-
-    source = make_restart_source(chat_id="42")
-    event = MessageEvent(
-        text="/restart",
-        message_type=MessageType.TEXT,
-        source=source,
-        message_id="m1",
-    )
-
-    await runner._handle_restart_command(event)
-    runner.request_restart.assert_called_once_with(detached=False, via_service=True)
-
-
-@pytest.mark.asyncio
-async def test_restart_command_uses_detached_without_systemd(tmp_path, monkeypatch):
-    """Without systemd, /restart uses the detached subprocess approach."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-    monkeypatch.delenv("INVOCATION_ID", raising=False)
-
-    runner, _adapter = make_restart_runner()
-    runner.request_restart = MagicMock(return_value=True)
-
-    source = make_restart_source(chat_id="42")
-    event = MessageEvent(
-        text="/restart",
-        message_type=MessageType.TEXT,
-        source=source,
-        message_id="m1",
-    )
-
-    await runner._handle_restart_command(event)
-    runner.request_restart.assert_called_once_with(detached=True, via_service=False)
-
-
-@pytest.mark.asyncio
-async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
-    """Thread ID is saved when the requester is in a threaded chat."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    runner, _adapter = make_restart_runner()
-    runner.request_restart = MagicMock(return_value=True)
-
-    source = make_restart_source(chat_id="99")
-    source.thread_id = "topic_7"
-
-    event = MessageEvent(
-        text="/restart",
-        message_type=MessageType.TEXT,
-        source=source,
-        message_id="m2",
-    )
-
-    await runner._handle_restart_command(event)
-
-    data = json.loads((tmp_path / ".restart_notify.json").read_text())
-    assert data["thread_id"] == "topic_7"
-
-
-# ── _send_restart_notification ───────────────────────────────────────────
-
-
-@pytest.mark.asyncio
-async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkeypatch):
-    """On startup, the notification is sent and the file is removed."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    notify_path = tmp_path / ".restart_notify.json"
-    notify_path.write_text(json.dumps({
-        "platform": "telegram",
-        "chat_id": "42",
-    }))
-
-    runner, adapter = make_restart_runner()
-    adapter.send = AsyncMock()
-
-    await runner._send_restart_notification()
-
-    adapter.send.assert_called_once()
-    call_args = adapter.send.call_args
-    assert call_args[0][0] == "42"  # chat_id
-    assert "restarted" in call_args[0][1].lower()
-    assert call_args[1].get("metadata") is None  # no thread
-    assert not notify_path.exists()
-
-
-@pytest.mark.asyncio
-async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
-    """Thread ID is passed as metadata so the message lands in the right topic."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    notify_path = tmp_path / ".restart_notify.json"
-    notify_path.write_text(json.dumps({
-        "platform": "telegram",
-        "chat_id": "99",
-        "thread_id": "topic_7",
-    }))
-
-    runner, adapter = make_restart_runner()
-    adapter.send = AsyncMock()
-
-    await runner._send_restart_notification()
-
-    call_args = adapter.send.call_args
-    assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
-    assert not notify_path.exists()
-
-
-@pytest.mark.asyncio
-async def test_send_restart_notification_noop_when_no_file(tmp_path, monkeypatch):
-    """Nothing happens if there's no pending restart notification."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    runner, adapter = make_restart_runner()
-    adapter.send = AsyncMock()
-
-    await runner._send_restart_notification()
-
-    adapter.send.assert_not_called()
-
-
-@pytest.mark.asyncio
-async def test_send_restart_notification_skips_when_adapter_missing(tmp_path, monkeypatch):
-    """If the requester's platform isn't connected, clean up without crashing."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    notify_path = tmp_path / ".restart_notify.json"
-    notify_path.write_text(json.dumps({
-        "platform": "discord",  # runner only has telegram adapter
-        "chat_id": "42",
-    }))
-
-    runner, _adapter = make_restart_runner()
-
-    await runner._send_restart_notification()
-
-    # File cleaned up even though we couldn't send
-    assert not notify_path.exists()
-
-
-@pytest.mark.asyncio
-async def test_send_restart_notification_cleans_up_on_send_failure(
-    tmp_path, monkeypatch
-):
-    """If the adapter.send() raises, the file is still cleaned up."""
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-
-    notify_path = tmp_path / ".restart_notify.json"
-    notify_path.write_text(json.dumps({
-        "platform": "telegram",
-        "chat_id": "42",
-    }))
-
-    runner, adapter = make_restart_runner()
-    adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
-
-    await runner._send_restart_notification()
-
-    assert not notify_path.exists()  # cleaned up despite error
@@ -396,27 +396,6 @@ class QueuedCommentaryAgent:
        }


-class VerboseAgent:
-    """Agent that emits a tool call with args whose JSON exceeds 200 chars."""
-    LONG_CODE = "x" * 300
-
-    def __init__(self, **kwargs):
-        self.tool_progress_callback = kwargs.get("tool_progress_callback")
-        self.tools = []
-
-    def run_conversation(self, message, conversation_history=None, task_id=None):
-        self.tool_progress_callback(
-            "tool.started", "execute_code", None,
-            {"code": self.LONG_CODE},
-        )
-        time.sleep(0.35)
-        return {
-            "final_response": "done",
-            "messages": [],
-            "api_calls": 1,
-        }
-
-
 async def _run_with_agent(
    monkeypatch,
    tmp_path,
@@ -596,45 +575,3 @@ async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monke
    assert result["final_response"] == "final response 2"
    assert "I'll inspect the repo first." in sent_texts
    assert "final response 1" in sent_texts
-
-
-@pytest.mark.asyncio
-async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
-    """Verbose mode with default tool_preview_length (0) should NOT truncate args.
-
-    Previously, verbose mode capped args at 200 chars when tool_preview_length
-    was 0 (default).  The user explicitly opted into verbose — show full detail.
-    """
-    adapter, result = await _run_with_agent(
-        monkeypatch,
-        tmp_path,
-        VerboseAgent,
-        session_id="sess-verbose-no-truncate",
-        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 0}},
-    )
-
-    assert result["final_response"] == "done"
-    # The full 300-char 'x' string should be present, not truncated to 200
-    all_content = " ".join(call["content"] for call in adapter.sent)
-    all_content += " ".join(call["content"] for call in adapter.edits)
-    assert VerboseAgent.LONG_CODE in all_content
-
-
-@pytest.mark.asyncio
-async def test_verbose_mode_respects_explicit_tool_preview_length(monkeypatch, tmp_path):
-    """When tool_preview_length is set to a positive value, verbose truncates to that."""
-    adapter, result = await _run_with_agent(
-        monkeypatch,
-        tmp_path,
-        VerboseAgent,
-        session_id="sess-verbose-explicit-cap",
-        config_data={"display": {"tool_progress": "verbose", "tool_preview_length": 50}},
-    )
-
-    assert result["final_response"] == "done"
-    all_content = " ".join(call["content"] for call in adapter.sent)
-    all_content += " ".join(call["content"] for call in adapter.edits)
-    # Should be truncated — full 300-char string NOT present
-    assert VerboseAgent.LONG_CODE not in all_content
-    # But should still contain the truncated portion with "..."
-    assert "..." in all_content
@@ -1,279 +0,0 @@
-"""Tests for _setup_feishu() in hermes_cli/gateway.py.
-
-Verifies that the interactive setup writes env vars that correctly drive the
-Feishu adapter: credentials, connection mode, DM policy, and group policy.
-"""
-
-import os
-from unittest.mock import patch
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _run_setup_feishu(
-    *,
-    qr_result=None,
-    prompt_yes_no_responses=None,
-    prompt_choice_responses=None,
-    prompt_responses=None,
-    existing_env=None,
-):
-    """Run _setup_feishu() with mocked I/O and return the env vars that were saved.
-
-    Returns a dict of {env_var_name: value} for all save_env_value calls.
-    """
-    existing_env = existing_env or {}
-    prompt_yes_no_responses = list(prompt_yes_no_responses or [True])
-    # QR path: method(0), dm(0), group(0) — 3 choices (no connection mode)
-    # Manual path: method(1), domain(0), connection(0), dm(0), group(0) — 5 choices
-    prompt_choice_responses = list(prompt_choice_responses or [0, 0, 0])
-    prompt_responses = list(prompt_responses or [""])
-
-    saved_env = {}
-
-    def mock_save(name, value):
-        saved_env[name] = value
-
-    def mock_get(name):
-        return existing_env.get(name, "")
-
-    with patch("hermes_cli.gateway.save_env_value", side_effect=mock_save), \
-         patch("hermes_cli.gateway.get_env_value", side_effect=mock_get), \
-         patch("hermes_cli.gateway.prompt_yes_no", side_effect=prompt_yes_no_responses), \
-         patch("hermes_cli.gateway.prompt_choice", side_effect=prompt_choice_responses), \
-         patch("hermes_cli.gateway.prompt", side_effect=prompt_responses), \
-         patch("hermes_cli.gateway.print_info"), \
-         patch("hermes_cli.gateway.print_success"), \
-         patch("hermes_cli.gateway.print_warning"), \
-         patch("hermes_cli.gateway.print_error"), \
-         patch("hermes_cli.gateway.color", side_effect=lambda t, c: t), \
-         patch("gateway.platforms.feishu.qr_register", return_value=qr_result):
-
-        from hermes_cli.gateway import _setup_feishu
-        _setup_feishu()
-
-    return saved_env
-
-
-# ---------------------------------------------------------------------------
-# QR scan-to-create path
-# ---------------------------------------------------------------------------
-
-class TestSetupFeishuQrPath:
-    """Tests for the QR scan-to-create happy path."""
-
-    def test_qr_success_saves_core_credentials(self):
-        env = _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test",
-                "app_secret": "secret_test",
-                "domain": "feishu",
-                "open_id": "ou_owner",
-                "bot_name": "TestBot",
-                "bot_open_id": "ou_bot",
-            },
-            prompt_yes_no_responses=[True],        # Start QR
-            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
-            prompt_responses=[""],                  # home channel: skip
-        )
-        assert env["FEISHU_APP_ID"] == "cli_test"
-        assert env["FEISHU_APP_SECRET"] == "secret_test"
-        assert env["FEISHU_DOMAIN"] == "feishu"
-
-    def test_qr_success_does_not_persist_bot_identity(self):
-        """Bot identity is discovered at runtime by _hydrate_bot_identity — not persisted
-        in env, so it stays fresh if the user renames the bot later."""
-        env = _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test",
-                "app_secret": "secret_test",
-                "domain": "feishu",
-                "open_id": "ou_owner",
-                "bot_name": "TestBot",
-                "bot_open_id": "ou_bot",
-            },
-            prompt_yes_no_responses=[True],
-            prompt_choice_responses=[0, 0, 0],
-            prompt_responses=[""],
-        )
-        assert "FEISHU_BOT_OPEN_ID" not in env
-        assert "FEISHU_BOT_NAME" not in env
-
-
-# ---------------------------------------------------------------------------
-# Connection mode
-# ---------------------------------------------------------------------------
-
-class TestSetupFeishuConnectionMode:
-    """Connection mode: QR always websocket, manual path lets user choose."""
-
-    def test_qr_path_defaults_to_websocket(self):
-        env = _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
-                "open_id": None, "bot_name": None, "bot_open_id": None,
-            },
-            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
-            prompt_responses=[""],
-        )
-        assert env["FEISHU_CONNECTION_MODE"] == "websocket"
-
-    @patch("gateway.platforms.feishu.probe_bot", return_value=None)
-    def test_manual_path_websocket(self, _mock_probe):
-        env = _run_setup_feishu(
-            qr_result=None,
-            prompt_choice_responses=[1, 0, 0, 0, 0],  # method=manual, domain=feishu, connection=ws, dm=pairing, group=open
-            prompt_responses=["cli_manual", "secret_manual", ""],  # app_id, app_secret, home_channel
-        )
-        assert env["FEISHU_CONNECTION_MODE"] == "websocket"
-
-    @patch("gateway.platforms.feishu.probe_bot", return_value=None)
-    def test_manual_path_webhook(self, _mock_probe):
-        env = _run_setup_feishu(
-            qr_result=None,
-            prompt_choice_responses=[1, 0, 1, 0, 0],  # method=manual, domain=feishu, connection=webhook, dm=pairing, group=open
-            prompt_responses=["cli_manual", "secret_manual", ""],  # app_id, app_secret, home_channel
-        )
-        assert env["FEISHU_CONNECTION_MODE"] == "webhook"
-
-
-# ---------------------------------------------------------------------------
-# DM security policy
-# ---------------------------------------------------------------------------
-
-class TestSetupFeishuDmPolicy:
-    """DM policy must use platform-scoped FEISHU_ALLOW_ALL_USERS, not the global flag."""
-
-    def _run_with_dm_choice(self, dm_choice_idx, prompt_responses=None):
-        return _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
-                "open_id": "ou_owner", "bot_name": None, "bot_open_id": None,
-            },
-            prompt_yes_no_responses=[True],
-            prompt_choice_responses=[0, dm_choice_idx, 0],  # method=QR, dm=<choice>, group=open
-            prompt_responses=prompt_responses or [""],
-        )
-
-    def test_pairing_sets_feishu_allow_all_false(self):
-        env = self._run_with_dm_choice(0)
-        assert env["FEISHU_ALLOW_ALL_USERS"] == "false"
-        assert env["FEISHU_ALLOWED_USERS"] == ""
-        assert "GATEWAY_ALLOW_ALL_USERS" not in env
-
-    def test_allow_all_sets_feishu_allow_all_true(self):
-        env = self._run_with_dm_choice(1)
-        assert env["FEISHU_ALLOW_ALL_USERS"] == "true"
-        assert env["FEISHU_ALLOWED_USERS"] == ""
-        assert "GATEWAY_ALLOW_ALL_USERS" not in env
-
-    def test_allowlist_sets_feishu_allow_all_false_with_list(self):
-        env = self._run_with_dm_choice(2, prompt_responses=["ou_user1,ou_user2", ""])
-        assert env["FEISHU_ALLOW_ALL_USERS"] == "false"
-        assert env["FEISHU_ALLOWED_USERS"] == "ou_user1,ou_user2"
-        assert "GATEWAY_ALLOW_ALL_USERS" not in env
-
-    def test_allowlist_prepopulates_with_scan_owner_open_id(self):
-        """When open_id is available from QR scan, it should be the default allowlist value."""
-        # We return the owner's open_id from prompt (+ empty home channel).
-        env = self._run_with_dm_choice(2, prompt_responses=["ou_owner", ""])
-        assert env["FEISHU_ALLOWED_USERS"] == "ou_owner"
-
-
-
-# ---------------------------------------------------------------------------
-# Group policy
-# ---------------------------------------------------------------------------
-
-class TestSetupFeishuGroupPolicy:
-
-    def test_open_with_mention(self):
-        env = _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
-                "open_id": None, "bot_name": None, "bot_open_id": None,
-            },
-            prompt_yes_no_responses=[True],
-            prompt_choice_responses=[0, 0, 0],  # method=QR, dm=pairing, group=open
-            prompt_responses=[""],
-        )
-        assert env["FEISHU_GROUP_POLICY"] == "open"
-
-    def test_disabled(self):
-        env = _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test", "app_secret": "s", "domain": "feishu",
-                "open_id": None, "bot_name": None, "bot_open_id": None,
-            },
-            prompt_yes_no_responses=[True],
-            prompt_choice_responses=[0, 0, 1],  # method=QR, dm=pairing, group=disabled
-            prompt_responses=[""],
-        )
-        assert env["FEISHU_GROUP_POLICY"] == "disabled"
-
-
-# ---------------------------------------------------------------------------
-# Adapter integration: env vars → FeishuAdapterSettings
-# ---------------------------------------------------------------------------
-
-class TestSetupFeishuAdapterIntegration:
-    """Verify that env vars written by _setup_feishu() produce a valid adapter config.
-
-    This bridges the gap between 'setup wrote the right env vars' and
-    'the adapter will actually initialize correctly from those vars'.
-    """
-
-    def _make_env_from_setup(self, dm_idx=0, group_idx=0):
-        """Run _setup_feishu via QR path and return the env vars it would write."""
-        return _run_setup_feishu(
-            qr_result={
-                "app_id": "cli_test_app",
-                "app_secret": "test_secret_value",
-                "domain": "feishu",
-                "open_id": "ou_owner",
-                "bot_name": "IntegrationBot",
-                "bot_open_id": "ou_bot_integration",
-            },
-            prompt_yes_no_responses=[True],
-            prompt_choice_responses=[0, dm_idx, group_idx],  # method=QR, dm, group
-            prompt_responses=[""],
-        )
-
-    @patch.dict(os.environ, {}, clear=True)
-    def test_qr_env_produces_valid_adapter_settings(self):
-        """QR setup → adapter initializes with websocket mode."""
-        env = self._make_env_from_setup()
-
-        with patch.dict(os.environ, env, clear=True):
-            from gateway.config import PlatformConfig
-            from gateway.platforms.feishu import FeishuAdapter
-            adapter = FeishuAdapter(PlatformConfig())
-            assert adapter._app_id == "cli_test_app"
-            assert adapter._app_secret == "test_secret_value"
-            assert adapter._domain_name == "feishu"
-            assert adapter._connection_mode == "websocket"
-
-    @patch.dict(os.environ, {}, clear=True)
-    def test_open_dm_env_sets_correct_adapter_state(self):
-        """Setup with 'allow all DMs' → adapter sees allow-all flag."""
-        env = self._make_env_from_setup(dm_idx=1)
-
-        with patch.dict(os.environ, env, clear=True):
-            from gateway.platforms.feishu import FeishuAdapter
-            from gateway.config import PlatformConfig
-            # Verify adapter initializes without error and env var is correct.
-            FeishuAdapter(PlatformConfig())
-            assert os.getenv("FEISHU_ALLOW_ALL_USERS") == "true"
-
-    @patch.dict(os.environ, {}, clear=True)
-    def test_group_open_env_sets_adapter_group_policy(self):
-        """Setup with 'open groups' → adapter group_policy is 'open'."""
-        env = self._make_env_from_setup(group_idx=0)
-
-        with patch.dict(os.environ, env, clear=True):
-            from gateway.config import PlatformConfig
-            from gateway.platforms.feishu import FeishuAdapter
-            adapter = FeishuAdapter(PlatformConfig())
-            assert adapter._group_policy == "open"
@@ -121,7 +121,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="off")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -133,7 +133,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="first")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -148,7 +148,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="all")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]

        await adapter.send("12345", "test content", reply_to="999")

@@ -162,7 +162,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="all")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]

        await adapter.send("12345", "test content", reply_to=None)

@@ -175,7 +175,7 @@ class TestSendWithReplyToMode:
        adapter = adapter_factory(reply_to_mode="first")
        adapter._bot = MagicMock()
        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]

        await adapter.send("12345", "test", reply_to="999")

@@ -403,56 +403,6 @@ class TestWatchUpdateProgress:

        # Should not crash; legacy notification handles this case

-    @pytest.mark.asyncio
-    async def test_prompt_forwarded_only_once(self, tmp_path):
-        """Regression: prompt must not be re-sent on every poll cycle.
-
-        Before the fix, the watcher never deleted .update_prompt.json after
-        forwarding, causing the same prompt to be sent every poll_interval.
-        """
-        runner = _make_runner()
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir()
-
-        pending = {"platform": "telegram", "chat_id": "111", "user_id": "222",
-                   "session_key": "agent:main:telegram:dm:111"}
-        (hermes_home / ".update_pending.json").write_text(json.dumps(pending))
-        (hermes_home / ".update_output.txt").write_text("")
-
-        mock_adapter = AsyncMock()
-        runner.adapters = {Platform.TELEGRAM: mock_adapter}
-
-        # Write the prompt file up front (before the watcher starts).
-        # The watcher should forward it exactly once, then delete it.
-        prompt = {"prompt": "Would you like to configure new options now? Y/n",
-                  "default": "n", "id": "dup-test"}
-        (hermes_home / ".update_prompt.json").write_text(json.dumps(prompt))
-
-        async def finish_after_polls():
-            # Wait long enough for multiple poll cycles to occur, then
-            # simulate a response + completion.
-            await asyncio.sleep(1.0)
-            (hermes_home / ".update_response").write_text("n")
-            await asyncio.sleep(0.3)
-            (hermes_home / ".update_exit_code").write_text("0")
-
-        with patch("gateway.run._hermes_home", hermes_home):
-            task = asyncio.create_task(finish_after_polls())
-            await runner._watch_update_progress(
-                poll_interval=0.1,
-                stream_interval=0.2,
-                timeout=10.0,
-            )
-            await task
-
-        # Count how many times the prompt text was sent
-        all_sent = [str(c) for c in mock_adapter.send.call_args_list]
-        prompt_sends = [s for s in all_sent if "configure new options" in s]
-        assert len(prompt_sends) == 1, (
-            f"Prompt was sent {len(prompt_sends)} times (expected 1). "
-            f"All sends: {all_sent}"
-        )
-

 # ---------------------------------------------------------------------------
 # Message interception for update prompts
@@ -1,141 +0,0 @@
-"""Tests for gateway weak credential rejection at startup.
-
-Ported from openclaw/openclaw#64586: rejects known-weak placeholder
-tokens at gateway startup instead of letting them silently fail
-against platform APIs.
-"""
-
-import logging
-
-import pytest
-
-from gateway.config import PlatformConfig, Platform, _validate_gateway_config
-
-
-# ---------------------------------------------------------------------------
-# Helper: create a minimal GatewayConfig with one enabled platform
-# ---------------------------------------------------------------------------
-
-
-def _make_gateway_config(platform, token, enabled=True, **extra_kwargs):
-    """Create a minimal GatewayConfig-like object for validation testing."""
-    from gateway.config import GatewayConfig
-
-    config = GatewayConfig(platforms={})
-    pconfig = PlatformConfig(enabled=enabled, token=token, **extra_kwargs)
-    config.platforms[platform] = pconfig
-    return config
-
-
-def _validate_and_return(config):
-    """Call _validate_gateway_config and return the config (mutated in place)."""
-    _validate_gateway_config(config)
-    return config
-
-
-# ---------------------------------------------------------------------------
-# Unit tests: platform token placeholder rejection
-# ---------------------------------------------------------------------------
-
-
-class TestPlatformTokenPlaceholderGuard:
-    """Verify that _validate_gateway_config disables platforms with placeholder tokens."""
-
-    def test_rejects_triple_asterisk(self, caplog):
-        """'***' is the .env.example placeholder — should be rejected."""
-        config = _make_gateway_config(Platform.TELEGRAM, "***")
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.TELEGRAM].enabled is False
-        assert "placeholder" in caplog.text.lower()
-
-    def test_rejects_changeme(self, caplog):
-        config = _make_gateway_config(Platform.DISCORD, "changeme")
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.DISCORD].enabled is False
-
-    def test_rejects_your_api_key(self, caplog):
-        config = _make_gateway_config(Platform.SLACK, "your_api_key")
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.SLACK].enabled is False
-
-    def test_rejects_placeholder(self, caplog):
-        config = _make_gateway_config(Platform.MATRIX, "placeholder")
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.MATRIX].enabled is False
-
-    def test_accepts_real_token(self, caplog):
-        """A real-looking bot token should pass validation."""
-        config = _make_gateway_config(
-            Platform.TELEGRAM, "7123456789:AAHdqTcvCH1vGWJxfSeOfSAs0K5PALDsaw"
-        )
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.TELEGRAM].enabled is True
-        assert "placeholder" not in caplog.text.lower()
-
-    def test_accepts_empty_token_without_error(self, caplog):
-        """Empty tokens get a warning (existing behavior), not a placeholder error."""
-        config = _make_gateway_config(Platform.TELEGRAM, "")
-        with caplog.at_level(logging.WARNING):
-            _validate_and_return(config)
-        # Empty token doesn't trigger placeholder rejection — enabled stays True
-        # (the existing empty-token warning is separate)
-        assert config.platforms[Platform.TELEGRAM].enabled is True
-
-    def test_disabled_platform_not_checked(self, caplog):
-        """Disabled platforms should not be validated."""
-        config = _make_gateway_config(Platform.TELEGRAM, "***", enabled=False)
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert "placeholder" not in caplog.text.lower()
-
-    def test_rejects_whitespace_padded_placeholder(self, caplog):
-        """Whitespace-padded placeholders should still be caught."""
-        config = _make_gateway_config(Platform.TELEGRAM, "  ***  ")
-        with caplog.at_level(logging.ERROR):
-            _validate_and_return(config)
-        assert config.platforms[Platform.TELEGRAM].enabled is False
-
-
-# ---------------------------------------------------------------------------
-# Integration test: API server placeholder key on network-accessible host
-# ---------------------------------------------------------------------------
-
-
-class TestAPIServerPlaceholderKeyGuard:
-    """Verify that the API server rejects placeholder keys on network hosts."""
-
-    @pytest.mark.asyncio
-    async def test_refuses_wildcard_with_placeholder_key(self):
-        from gateway.platforms.api_server import APIServerAdapter
-
-        adapter = APIServerAdapter(
-            PlatformConfig(enabled=True, extra={"host": "0.0.0.0", "key": "changeme"})
-        )
-        result = await adapter.connect()
-        assert result is False
-
-    @pytest.mark.asyncio
-    async def test_refuses_wildcard_with_asterisk_key(self):
-        from gateway.platforms.api_server import APIServerAdapter
-
-        adapter = APIServerAdapter(
-            PlatformConfig(enabled=True, extra={"host": "0.0.0.0", "key": "***"})
-        )
-        result = await adapter.connect()
-        assert result is False
-
-    def test_allows_loopback_with_placeholder_key(self):
-        """Loopback with a placeholder key is fine — not network-exposed."""
-        from gateway.platforms.api_server import APIServerAdapter
-        from gateway.platforms.base import is_network_accessible
-
-        adapter = APIServerAdapter(
-            PlatformConfig(enabled=True, extra={"host": "127.0.0.1", "key": "changeme"})
-        )
-        # On loopback the placeholder guard doesn't fire
-        assert is_network_accessible(adapter._host) is False
@@ -30,7 +30,7 @@ class TestWeixinFormatting:

        assert (
            adapter.format_message(content)
-            == "【Title】\n\n**Plan**\n\nUse **bold** and docs (https://example.com)."
+            == "【Title】\n\n**Plan**\n\nUse **bold** and [docs](https://example.com)."
        )

    def test_format_message_rewrites_markdown_tables(self):
@@ -374,149 +374,3 @@ class TestWeixinRemoteMediaSafety:
                assert "Blocked unsafe URL" in str(exc)
            else:
                raise AssertionError("expected ValueError for unsafe URL")
-
-
-class TestWeixinMarkdownLinks:
-    """Markdown links should be converted to plaintext since WeChat can't render them."""
-
-    def test_format_message_converts_markdown_links_to_plain_text(self):
-        adapter = _make_adapter()
-
-        content = "Check [the docs](https://example.com) and [GitHub](https://github.com) for details"
-        assert (
-            adapter.format_message(content)
-            == "Check the docs (https://example.com) and GitHub (https://github.com) for details"
-        )
-
-    def test_format_message_preserves_links_inside_code_blocks(self):
-        adapter = _make_adapter()
-
-        content = "See below:\n\n```\n[link](https://example.com)\n```\n\nDone."
-        result = adapter.format_message(content)
-        assert "[link](https://example.com)" in result
-
-
-class TestWeixinBlankMessagePrevention:
-    """Regression tests for the blank-bubble bugs.
-
-    Three separate guards now prevent a blank WeChat message from ever being
-    dispatched:
-
-    1. ``_split_text_for_weixin_delivery("")`` returns ``[]`` — not ``[""]``.
-    2. ``send()`` filters out empty/whitespace-only chunks before calling
-       ``_send_text_chunk``.
-    3. ``_send_message()`` raises ``ValueError`` for empty text as a last-resort
-       safety net.
-    """
-
-    def test_split_text_returns_empty_list_for_empty_string(self):
-        adapter = _make_adapter()
-        assert adapter._split_text("") == []
-
-    def test_split_text_returns_empty_list_for_empty_string_split_per_line(self):
-        adapter = WeixinAdapter(
-            PlatformConfig(
-                enabled=True,
-                extra={
-                    "account_id": "acct",
-                    "token": "test-tok",
-                    "split_multiline_messages": True,
-                },
-            )
-        )
-        assert adapter._split_text("") == []
-
-    @patch("gateway.platforms.weixin._send_message", new_callable=AsyncMock)
-    def test_send_empty_content_does_not_call_send_message(self, send_message_mock):
-        adapter = _make_adapter()
-        adapter._session = object()
-        adapter._token = "test-token"
-        adapter._base_url = "https://weixin.example.com"
-        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
-
-        result = asyncio.run(adapter.send("wxid_test123", ""))
-        # Empty content → no chunks → no _send_message calls
-        assert result.success is True
-        send_message_mock.assert_not_awaited()
-
-    def test_send_message_rejects_empty_text(self):
-        """_send_message raises ValueError for empty/whitespace text."""
-        import pytest
-        with pytest.raises(ValueError, match="text must not be empty"):
-            asyncio.run(
-                weixin._send_message(
-                    AsyncMock(),
-                    base_url="https://example.com",
-                    token="tok",
-                    to="wxid_test",
-                    text="",
-                    context_token=None,
-                    client_id="cid",
-                )
-            )
-
-
-class TestWeixinStreamingCursorSuppression:
-    """WeChat doesn't support message editing — cursor must be suppressed."""
-
-    def test_supports_message_editing_is_false(self):
-        adapter = _make_adapter()
-        assert adapter.SUPPORTS_MESSAGE_EDITING is False
-
-
-class TestWeixinMediaBuilder:
-    """Media builder uses base64(hex_key), not base64(raw_bytes) for aes_key."""
-
-    def test_image_builder_aes_key_is_base64_of_hex(self):
-        import base64
-        adapter = _make_adapter()
-        media_type, builder = adapter._outbound_media_builder("photo.jpg")
-        assert media_type == weixin.MEDIA_IMAGE
-
-        fake_hex_key = "0123456789abcdef0123456789abcdef"
-        expected_aes = base64.b64encode(fake_hex_key.encode("ascii")).decode("ascii")
-        item = builder(
-            encrypt_query_param="eq",
-            aes_key_for_api=expected_aes,
-            ciphertext_size=1024,
-            plaintext_size=1000,
-            filename="photo.jpg",
-            rawfilemd5="abc123",
-        )
-        assert item["image_item"]["media"]["aes_key"] == expected_aes
-
-    def test_video_builder_includes_md5(self):
-        adapter = _make_adapter()
-        media_type, builder = adapter._outbound_media_builder("clip.mp4")
-        assert media_type == weixin.MEDIA_VIDEO
-
-        item = builder(
-            encrypt_query_param="eq",
-            aes_key_for_api="fakekey",
-            ciphertext_size=2048,
-            plaintext_size=2000,
-            filename="clip.mp4",
-            rawfilemd5="deadbeef",
-        )
-        assert item["video_item"]["video_md5"] == "deadbeef"
-
-    def test_voice_builder_for_audio_files(self):
-        adapter = _make_adapter()
-        media_type, builder = adapter._outbound_media_builder("note.mp3")
-        assert media_type == weixin.MEDIA_VOICE
-
-        item = builder(
-            encrypt_query_param="eq",
-            aes_key_for_api="fakekey",
-            ciphertext_size=512,
-            plaintext_size=500,
-            filename="note.mp3",
-            rawfilemd5="abc",
-        )
-        assert item["type"] == weixin.ITEM_VOICE
-        assert "voice_item" in item
-
-    def test_voice_builder_for_silk_files(self):
-        adapter = _make_adapter()
-        media_type, builder = adapter._outbound_media_builder("recording.silk")
-        assert media_type == weixin.MEDIA_VOICE
@@ -1,271 +0,0 @@
-"""Tests for WhatsApp message formatting and chunking.
-
-Covers:
- format_message(): markdown → WhatsApp syntax conversion
- send(): message chunking for long responses
- MAX_MESSAGE_LENGTH: practical UX limit
-"""
-
-import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
-
-from gateway.config import Platform, PlatformConfig
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _make_adapter():
-    """Create a WhatsAppAdapter with test attributes (bypass __init__)."""
-    from gateway.platforms.whatsapp import WhatsAppAdapter
-
-    adapter = WhatsAppAdapter.__new__(WhatsAppAdapter)
-    adapter.platform = Platform.WHATSAPP
-    adapter.config = MagicMock()
-    adapter.config.extra = {}
-    adapter._bridge_port = 3000
-    adapter._bridge_script = "/tmp/test-bridge.js"
-    adapter._session_path = MagicMock()
-    adapter._bridge_log_fh = None
-    adapter._bridge_log = None
-    adapter._bridge_process = None
-    adapter._reply_prefix = None
-    adapter._running = True
-    adapter._message_handler = None
-    adapter._fatal_error_code = None
-    adapter._fatal_error_message = None
-    adapter._fatal_error_retryable = True
-    adapter._fatal_error_handler = None
-    adapter._active_sessions = {}
-    adapter._pending_messages = {}
-    adapter._background_tasks = set()
-    adapter._auto_tts_disabled_chats = set()
-    adapter._message_queue = asyncio.Queue()
-    adapter._http_session = MagicMock()
-    adapter._mention_patterns = []
-    return adapter
-
-
-class _AsyncCM:
-    """Minimal async context manager returning a fixed value."""
-
-    def __init__(self, value):
-        self.value = value
-
-    async def __aenter__(self):
-        return self.value
-
-    async def __aexit__(self, *exc):
-        return False
-
-
-# ---------------------------------------------------------------------------
-# format_message tests
-# ---------------------------------------------------------------------------
-
-class TestFormatMessage:
-    """WhatsApp markdown conversion."""
-
-    def test_bold_double_asterisk(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("**hello**") == "*hello*"
-
-    def test_bold_double_underscore(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("__hello__") == "*hello*"
-
-    def test_strikethrough(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("~~deleted~~") == "~deleted~"
-
-    def test_headers_converted_to_bold(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("# Title") == "*Title*"
-        assert adapter.format_message("## Subtitle") == "*Subtitle*"
-        assert adapter.format_message("### Deep") == "*Deep*"
-
-    def test_links_converted(self):
-        adapter = _make_adapter()
-        result = adapter.format_message("[click here](https://example.com)")
-        assert result == "click here (https://example.com)"
-
-    def test_code_blocks_protected(self):
-        """Code blocks should not have their content reformatted."""
-        adapter = _make_adapter()
-        content = "before **bold** ```python\n**not bold**\n``` after **bold**"
-        result = adapter.format_message(content)
-        assert "```python\n**not bold**\n```" in result
-        assert result.startswith("before *bold*")
-        assert result.endswith("after *bold*")
-
-    def test_inline_code_protected(self):
-        """Inline code should not have its content reformatted."""
-        adapter = _make_adapter()
-        content = "use `**raw**` here"
-        result = adapter.format_message(content)
-        assert "`**raw**`" in result
-        assert result.startswith("use ")
-
-    def test_empty_content(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("") == ""
-        assert adapter.format_message(None) is None
-
-    def test_plain_text_unchanged(self):
-        adapter = _make_adapter()
-        assert adapter.format_message("hello world") == "hello world"
-
-    def test_already_whatsapp_italic(self):
-        """Single *italic* should pass through unchanged."""
-        adapter = _make_adapter()
-        # After bold conversion, *text* is WhatsApp italic
-        assert adapter.format_message("*italic*") == "*italic*"
-
-    def test_multiline_mixed(self):
-        adapter = _make_adapter()
-        content = "# Header\n\n**Bold text** and ~~strike~~\n\n```\ncode\n```"
-        result = adapter.format_message(content)
-        assert "*Header*" in result
-        assert "*Bold text*" in result
-        assert "~strike~" in result
-        assert "```\ncode\n```" in result
-
-
-# ---------------------------------------------------------------------------
-# MAX_MESSAGE_LENGTH tests
-# ---------------------------------------------------------------------------
-
-class TestMessageLimits:
-    """WhatsApp message length limits."""
-
-    def test_max_message_length_is_practical(self):
-        from gateway.platforms.whatsapp import WhatsAppAdapter
-        assert WhatsAppAdapter.MAX_MESSAGE_LENGTH == 4096
-
-
-# ---------------------------------------------------------------------------
-# send() chunking tests
-# ---------------------------------------------------------------------------
-
-class TestSendChunking:
-    """WhatsApp send() splits long messages into chunks."""
-
-    @pytest.mark.asyncio
-    async def test_short_message_single_send(self):
-        adapter = _make_adapter()
-        resp = MagicMock(status=200)
-        resp.json = AsyncMock(return_value={"messageId": "msg1"})
-        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
-
-        result = await adapter.send("chat1", "short message")
-        assert result.success
-        # Only one call to bridge /send
-        assert adapter._http_session.post.call_count == 1
-
-    @pytest.mark.asyncio
-    async def test_long_message_chunked(self):
-        adapter = _make_adapter()
-        resp = MagicMock(status=200)
-        resp.json = AsyncMock(return_value={"messageId": "msg1"})
-        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
-
-        # Create a message longer than MAX_MESSAGE_LENGTH (4096)
-        long_msg = "a " * 3000  # ~6000 chars
-
-        result = await adapter.send("chat1", long_msg)
-        assert result.success
-        # Should have made multiple calls
-        assert adapter._http_session.post.call_count > 1
-
-    @pytest.mark.asyncio
-    async def test_empty_message_no_send(self):
-        adapter = _make_adapter()
-        result = await adapter.send("chat1", "")
-        assert result.success
-        assert adapter._http_session.post.call_count == 0
-
-    @pytest.mark.asyncio
-    async def test_whitespace_only_no_send(self):
-        adapter = _make_adapter()
-        result = await adapter.send("chat1", "   \n  ")
-        assert result.success
-        assert adapter._http_session.post.call_count == 0
-
-    @pytest.mark.asyncio
-    async def test_format_applied_before_send(self):
-        """Markdown should be converted to WhatsApp format before sending."""
-        adapter = _make_adapter()
-        resp = MagicMock(status=200)
-        resp.json = AsyncMock(return_value={"messageId": "msg1"})
-        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
-
-        await adapter.send("chat1", "**bold text**")
-
-        # Check the payload sent to the bridge
-        call_args = adapter._http_session.post.call_args
-        payload = call_args.kwargs.get("json") or call_args[1].get("json")
-        assert payload["message"] == "*bold text*"
-
-    @pytest.mark.asyncio
-    async def test_reply_to_only_on_first_chunk(self):
-        """reply_to should only be set on the first chunk."""
-        adapter = _make_adapter()
-        resp = MagicMock(status=200)
-        resp.json = AsyncMock(return_value={"messageId": "msg1"})
-        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
-
-        long_msg = "word " * 2000  # ~10000 chars, multiple chunks
-
-        await adapter.send("chat1", long_msg, reply_to="orig123")
-
-        calls = adapter._http_session.post.call_args_list
-        assert len(calls) > 1
-
-        # First chunk should have replyTo
-        first_payload = calls[0].kwargs.get("json") or calls[0][1].get("json")
-        assert first_payload.get("replyTo") == "orig123"
-
-        # Subsequent chunks should NOT have replyTo
-        for call in calls[1:]:
-            payload = call.kwargs.get("json") or call[1].get("json")
-            assert "replyTo" not in payload
-
-    @pytest.mark.asyncio
-    async def test_bridge_error_returns_failure(self):
-        adapter = _make_adapter()
-        resp = MagicMock(status=500)
-        resp.text = AsyncMock(return_value="Internal Server Error")
-        adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp))
-
-        result = await adapter.send("chat1", "hello")
-        assert not result.success
-        assert "Internal Server Error" in result.error
-
-    @pytest.mark.asyncio
-    async def test_not_connected_returns_failure(self):
-        adapter = _make_adapter()
-        adapter._running = False
-
-        result = await adapter.send("chat1", "hello")
-        assert not result.success
-        assert "Not connected" in result.error
-
-
-# ---------------------------------------------------------------------------
-# display_config tier classification
-# ---------------------------------------------------------------------------
-
-class TestWhatsAppTier:
-    """WhatsApp should be classified as TIER_MEDIUM."""
-
-    def test_whatsapp_streaming_follows_global(self):
-        from gateway.display_config import resolve_display_setting
-        # TIER_MEDIUM has streaming: None (follow global), not False
-        assert resolve_display_setting({}, "whatsapp", "streaming") is None
-
-    def test_whatsapp_tool_progress_is_new(self):
-        from gateway.display_config import resolve_display_setting
-        assert resolve_display_setting({}, "whatsapp", "tool_progress") == "new"
@@ -14,7 +14,6 @@ from hermes_cli.auth import (
    PROVIDER_REGISTRY,
    _read_codex_tokens,
    _save_codex_tokens,
-    _write_codex_cli_tokens,
    _import_codex_cli_tokens,
    get_codex_auth_status,
    get_provider_auth_state,
@@ -162,7 +161,7 @@ def test_import_codex_cli_tokens_missing(tmp_path, monkeypatch):


 def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
-    """Verify _save_codex_tokens writes only to Hermes auth store, not ~/.codex/."""
+    """Verify Hermes never writes to ~/.codex/auth.json."""
    hermes_home = tmp_path / "hermes"
    codex_home = tmp_path / "codex-cli"
    hermes_home.mkdir(parents=True, exist_ok=True)
@@ -174,7 +173,7 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):

    _save_codex_tokens({"access_token": "hermes-at", "refresh_token": "hermes-rt"})

-    # ~/.codex/auth.json should NOT exist — _save_codex_tokens only touches Hermes store
+    # ~/.codex/auth.json should NOT exist
    assert not (codex_home / "auth.json").exists()

    # Hermes auth store should have the tokens
@@ -182,98 +181,6 @@ def test_codex_tokens_not_written_to_shared_file(tmp_path, monkeypatch):
    assert data["tokens"]["access_token"] == "hermes-at"


-def test_write_codex_cli_tokens_creates_file(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates ~/.codex/auth.json with refreshed tokens."""
-    codex_home = tmp_path / "codex-cli"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("new-access", "new-refresh", last_refresh="2026-04-12T00:00:00Z")
-
-    auth_path = codex_home / "auth.json"
-    assert auth_path.exists()
-    data = json.loads(auth_path.read_text())
-    assert data["tokens"]["access_token"] == "new-access"
-    assert data["tokens"]["refresh_token"] == "new-refresh"
-    assert data["last_refresh"] == "2026-04-12T00:00:00Z"
-    # Verify file permissions are restricted
-    assert (auth_path.stat().st_mode & 0o777) == 0o600
-
-
-def test_write_codex_cli_tokens_preserves_existing(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens preserves extra fields in existing auth.json."""
-    codex_home = tmp_path / "codex-cli"
-    codex_home.mkdir(parents=True, exist_ok=True)
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    existing = {
-        "tokens": {
-            "access_token": "old-access",
-            "refresh_token": "old-refresh",
-            "extra_field": "preserved",
-        },
-        "last_refresh": "2026-01-01T00:00:00Z",
-        "custom_key": "keep_me",
-    }
-    (codex_home / "auth.json").write_text(json.dumps(existing))
-
-    _write_codex_cli_tokens("updated-access", "updated-refresh")
-
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "updated-access"
-    assert data["tokens"]["refresh_token"] == "updated-refresh"
-    assert data["tokens"]["extra_field"] == "preserved"
-    assert data["custom_key"] == "keep_me"
-    # last_refresh not updated since we didn't pass it
-    assert data["last_refresh"] == "2026-01-01T00:00:00Z"
-
-
-def test_write_codex_cli_tokens_handles_missing_dir(tmp_path, monkeypatch):
-    """_write_codex_cli_tokens creates parent directories if missing."""
-    codex_home = tmp_path / "does" / "not" / "exist"
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    _write_codex_cli_tokens("at", "rt")
-
-    assert (codex_home / "auth.json").exists()
-    data = json.loads((codex_home / "auth.json").read_text())
-    assert data["tokens"]["access_token"] == "at"
-
-
-def test_refresh_codex_auth_tokens_writes_back_to_cli(tmp_path, monkeypatch):
-    """After refreshing, _refresh_codex_auth_tokens writes back to ~/.codex/auth.json."""
-    from hermes_cli.auth import _refresh_codex_auth_tokens
-
-    hermes_home = tmp_path / "hermes"
-    codex_home = tmp_path / "codex-cli"
-    hermes_home.mkdir(parents=True, exist_ok=True)
-    codex_home.mkdir(parents=True, exist_ok=True)
-    (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-
-    # Write initial CLI tokens
-    (codex_home / "auth.json").write_text(json.dumps({
-        "tokens": {"access_token": "old-at", "refresh_token": "old-rt"},
-    }))
-
-    # Mock the pure refresh to return new tokens
-    monkeypatch.setattr("hermes_cli.auth.refresh_codex_oauth_pure", lambda *a, **kw: {
-        "access_token": "refreshed-at",
-        "refresh_token": "refreshed-rt",
-        "last_refresh": "2026-04-12T01:00:00Z",
-    })
-
-    _refresh_codex_auth_tokens(
-        {"access_token": "old-at", "refresh_token": "old-rt"},
-        timeout_seconds=10,
-    )
-
-    # Verify CLI file was updated
-    cli_data = json.loads((codex_home / "auth.json").read_text())
-    assert cli_data["tokens"]["access_token"] == "refreshed-at"
-    assert cli_data["tokens"]["refresh_token"] == "refreshed-rt"
-
-
 def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch):
    hermes_home = tmp_path / "hermes"
    _setup_hermes_auth(hermes_home)
@@ -232,44 +232,6 @@ class TestBackup:
        assert len(zips) == 1


-# ---------------------------------------------------------------------------
-# _validate_backup_zip tests
-# ---------------------------------------------------------------------------
-
-class TestValidateBackupZip:
-    def _make_zip(self, zip_path: Path, filenames: list[str]) -> None:
-        with zipfile.ZipFile(zip_path, "w") as zf:
-            for name in filenames:
-                zf.writestr(name, "dummy")
-
-    def test_state_db_passes(self, tmp_path):
-        """A zip containing state.db is accepted as a valid Hermes backup."""
-        from hermes_cli.backup import _validate_backup_zip
-        zip_path = tmp_path / "backup.zip"
-        self._make_zip(zip_path, ["state.db", "sessions/abc.json"])
-        with zipfile.ZipFile(zip_path, "r") as zf:
-            ok, reason = _validate_backup_zip(zf)
-        assert ok, reason
-
-    def test_old_wrong_db_name_fails(self, tmp_path):
-        """A zip with only hermes_state.db (old wrong name) is rejected."""
-        from hermes_cli.backup import _validate_backup_zip
-        zip_path = tmp_path / "old.zip"
-        self._make_zip(zip_path, ["hermes_state.db", "memory_store.db"])
-        with zipfile.ZipFile(zip_path, "r") as zf:
-            ok, reason = _validate_backup_zip(zf)
-        assert not ok
-
-    def test_config_yaml_passes(self, tmp_path):
-        """A zip containing config.yaml is accepted (existing behaviour preserved)."""
-        from hermes_cli.backup import _validate_backup_zip
-        zip_path = tmp_path / "backup.zip"
-        self._make_zip(zip_path, ["config.yaml", "skills/x/SKILL.md"])
-        with zipfile.ZipFile(zip_path, "r") as zf:
-            ok, reason = _validate_backup_zip(zf)
-        assert ok, reason
-
-
 # ---------------------------------------------------------------------------
 # Import tests
 # ---------------------------------------------------------------------------
@@ -1,7 +1,6 @@
 """Tests for hermes claw commands."""

 from argparse import Namespace
-import subprocess
 from types import ModuleType
 from unittest.mock import MagicMock, patch

@@ -198,11 +197,6 @@ class TestClawCommand:
 class TestCmdMigrate:
    """Test the migrate command handler."""

-    @pytest.fixture(autouse=True)
-    def _mock_openclaw_running(self):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
-            yield
-
    def test_error_when_source_missing(self, tmp_path, capsys):
        args = Namespace(
            source=str(tmp_path / "nonexistent"),
@@ -632,120 +626,3 @@ class TestPrintMigrationReport:
        claw_mod._print_migration_report(report, dry_run=False)
        captured = capsys.readouterr()
        assert "Nothing to migrate" in captured.out
-
-
-class TestDetectOpenclawProcesses:
-    def test_returns_match_when_pgrep_finds_openclaw(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "linux"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                # systemd check misses, pgrep finds openclaw
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=1, stdout=""),  # systemctl
-                    MagicMock(returncode=0, stdout="1234\n"),  # pgrep
-                ]
-                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
-                result = claw_mod._detect_openclaw_processes()
-                assert len(result) == 1
-                assert "1234" in result[0]
-
-    def test_returns_empty_when_pgrep_finds_nothing(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "darwin"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=1, stdout=""),  # systemctl (not found)
-                    MagicMock(returncode=1, stdout=""),  # pgrep
-                ]
-                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
-                result = claw_mod._detect_openclaw_processes()
-                assert result == []
-
-    def test_detects_systemd_service(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "linux"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=0, stdout="active\n"),  # systemctl
-                    MagicMock(returncode=1, stdout=""),  # pgrep
-                ]
-                mock_subprocess.TimeoutExpired = subprocess.TimeoutExpired
-                result = claw_mod._detect_openclaw_processes()
-                assert len(result) == 1
-                assert "systemd" in result[0]
-
-    def test_returns_match_on_windows_when_openclaw_exe_running(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "win32"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=0, stdout="openclaw.exe                 1234 Console    1     45,056 K\n"),
-                ]
-                result = claw_mod._detect_openclaw_processes()
-                assert len(result) >= 1
-                assert any("openclaw.exe" in r for r in result)
-
-    def test_returns_match_on_windows_when_node_exe_has_openclaw_in_cmdline(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "win32"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=0, stdout=""),  # tasklist openclaw.exe
-                    MagicMock(returncode=0, stdout=""),  # tasklist clawd.exe
-                    MagicMock(returncode=0, stdout="1234\n"),  # PowerShell
-                ]
-                result = claw_mod._detect_openclaw_processes()
-                assert len(result) >= 1
-                assert any("node.exe" in r for r in result)
-
-    def test_returns_empty_on_windows_when_nothing_found(self):
-        with patch.object(claw_mod, "sys") as mock_sys:
-            mock_sys.platform = "win32"
-            with patch.object(claw_mod, "subprocess") as mock_subprocess:
-                mock_subprocess.run.side_effect = [
-                    MagicMock(returncode=0, stdout=""),
-                    MagicMock(returncode=0, stdout=""),
-                    MagicMock(returncode=0, stdout=""),
-                ]
-                result = claw_mod._detect_openclaw_processes()
-                assert result == []
-
-
-class TestWarnIfOpenclawRunning:
-    def test_noop_when_not_running(self, capsys):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=[]):
-            claw_mod._warn_if_openclaw_running(auto_yes=False)
-        captured = capsys.readouterr()
-        assert captured.out == ""
-
-    def test_warns_and_exits_when_running_and_user_declines(self, capsys):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
-            with patch.object(claw_mod, "prompt_yes_no", return_value=False):
-                with patch.object(claw_mod.sys.stdin, "isatty", return_value=True):
-                    with pytest.raises(SystemExit) as exc_info:
-                        claw_mod._warn_if_openclaw_running(auto_yes=False)
-        assert exc_info.value.code == 0
-        captured = capsys.readouterr()
-        assert "OpenClaw appears to be running" in captured.out
-
-    def test_warns_and_continues_when_running_and_user_accepts(self, capsys):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
-            with patch.object(claw_mod, "prompt_yes_no", return_value=True):
-                with patch.object(claw_mod.sys.stdin, "isatty", return_value=True):
-                    claw_mod._warn_if_openclaw_running(auto_yes=False)
-        captured = capsys.readouterr()
-        assert "OpenClaw appears to be running" in captured.out
-
-    def test_warns_and_continues_in_auto_yes_mode(self, capsys):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
-            claw_mod._warn_if_openclaw_running(auto_yes=True)
-        captured = capsys.readouterr()
-        assert "OpenClaw appears to be running" in captured.out
-
-    def test_warns_and_continues_in_non_interactive_session(self, capsys):
-        with patch.object(claw_mod, "_detect_openclaw_processes", return_value=["openclaw process(es) (PIDs: 1234)"]):
-            with patch.object(claw_mod.sys.stdin, "isatty", return_value=False):
-                claw_mod._warn_if_openclaw_running(auto_yes=False)
-        captured = capsys.readouterr()
-        assert "OpenClaw appears to be running" in captured.out
-        assert "Non-interactive session" in captured.out
@@ -12,10 +12,49 @@ from unittest.mock import MagicMock, patch
 import pytest

 from hermes_cli.config import (
+    _is_inside_container,
    get_container_exec_info,
 )


+# =============================================================================
+# _is_inside_container
+# =============================================================================
+
+
+def test_is_inside_container_dockerenv():
+    """Detects /.dockerenv marker file."""
+    with patch("os.path.exists") as mock_exists:
+        mock_exists.side_effect = lambda p: p == "/.dockerenv"
+        assert _is_inside_container() is True
+
+
+def test_is_inside_container_containerenv():
+    """Detects Podman's /run/.containerenv marker."""
+    with patch("os.path.exists") as mock_exists:
+        mock_exists.side_effect = lambda p: p == "/run/.containerenv"
+        assert _is_inside_container() is True
+
+
+def test_is_inside_container_cgroup_docker():
+    """Detects 'docker' in /proc/1/cgroup."""
+    with patch("os.path.exists", return_value=False), \
+         patch("builtins.open", create=True) as mock_open:
+        mock_open.return_value.__enter__ = lambda s: s
+        mock_open.return_value.__exit__ = MagicMock(return_value=False)
+        mock_open.return_value.read = MagicMock(
+            return_value="12:memory:/docker/abc123\n"
+        )
+        assert _is_inside_container() is True
+
+
+def test_is_inside_container_false_on_host():
+    """Returns False when none of the container indicators are present."""
+    with patch("os.path.exists", return_value=False), \
+         patch("builtins.open", side_effect=OSError("no such file")):
+        assert _is_inside_container() is False
+
+
 # =============================================================================
 # get_container_exec_info
 # =============================================================================
@@ -42,7 +81,7 @@ def container_env(tmp_path, monkeypatch):

 def test_get_container_exec_info_returns_metadata(container_env):
    """Reads .container-mode and returns all fields including exec_user."""
-    with patch("hermes_constants.is_container", return_value=False):
+    with patch("hermes_cli.config._is_inside_container", return_value=False):
        info = get_container_exec_info()

    assert info is not None
@@ -54,7 +93,7 @@ def test_get_container_exec_info_returns_metadata(container_env):

 def test_get_container_exec_info_none_inside_container(container_env):
    """Returns None when we're already inside a container."""
-    with patch("hermes_constants.is_container", return_value=True):
+    with patch("hermes_cli.config._is_inside_container", return_value=True):
        info = get_container_exec_info()

    assert info is None
@@ -67,7 +106,7 @@ def test_get_container_exec_info_none_without_file(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
    monkeypatch.delenv("HERMES_DEV", raising=False)

-    with patch("hermes_constants.is_container", return_value=False):
+    with patch("hermes_cli.config._is_inside_container", return_value=False):
        info = get_container_exec_info()

    assert info is None
@@ -77,7 +116,7 @@ def test_get_container_exec_info_skipped_when_hermes_dev(container_env, monkeypa
    """Returns None when HERMES_DEV=1 is set (dev mode bypass)."""
    monkeypatch.setenv("HERMES_DEV", "1")

-    with patch("hermes_constants.is_container", return_value=False):
+    with patch("hermes_cli.config._is_inside_container", return_value=False):
        info = get_container_exec_info()

    assert info is None
@@ -87,7 +126,7 @@ def test_get_container_exec_info_not_skipped_when_hermes_dev_zero(container_env,
    """HERMES_DEV=0 does NOT trigger bypass — only '1' does."""
    monkeypatch.setenv("HERMES_DEV", "0")

-    with patch("hermes_constants.is_container", return_value=False):
+    with patch("hermes_cli.config._is_inside_container", return_value=False):
        info = get_container_exec_info()

    assert info is not None
@@ -104,7 +143,7 @@ def test_get_container_exec_info_defaults():
            "# minimal file with no keys\n"
        )

-        with patch("hermes_constants.is_container", return_value=False), \
+        with patch("hermes_cli.config._is_inside_container", return_value=False), \
             patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \
             patch.dict(os.environ, {}, clear=False):
            os.environ.pop("HERMES_DEV", None)
@@ -126,7 +165,7 @@ def test_get_container_exec_info_docker_backend(container_env):
        "hermes_bin=/opt/hermes/bin/hermes\n"
    )

-    with patch("hermes_constants.is_container", return_value=False):
+    with patch("hermes_cli.config._is_inside_container", return_value=False):
        info = get_container_exec_info()

    assert info["backend"] == "docker"
@@ -137,7 +176,7 @@ def test_get_container_exec_info_docker_backend(container_env):

 def test_get_container_exec_info_crashes_on_permission_error(container_env):
    """PermissionError propagates instead of being silently swallowed."""
-    with patch("hermes_constants.is_container", return_value=False), \
+    with patch("hermes_cli.config._is_inside_container", return_value=False), \
         patch("builtins.open", side_effect=PermissionError("permission denied")):
        with pytest.raises(PermissionError):
            get_container_exec_info()
@@ -122,54 +122,3 @@ class TestCustomProviderModelSwitch:
        model = config.get("model")
        assert isinstance(model, dict)
        assert model["default"] == "model-X"
-
-    def test_api_mode_set_from_provider_info(self, config_home):
-        """When custom_providers entry has api_mode, it should be applied."""
-        import yaml
-        from hermes_cli.main import _model_flow_named_custom
-
-        provider_info = {
-            "name": "Anthropic Proxy",
-            "base_url": "https://proxy.example.com/anthropic",
-            "api_key": "***",
-            "model": "claude-3",
-            "api_mode": "anthropic_messages",
-        }
-
-        with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \
-             patch.dict("sys.modules", {"simple_term_menu": None}), \
-             patch("builtins.input", return_value="1"), \
-             patch("builtins.print"):
-            _model_flow_named_custom({}, provider_info)
-
-        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
-        model = config.get("model")
-        assert isinstance(model, dict)
-        assert model.get("api_mode") == "anthropic_messages"
-
-    def test_api_mode_cleared_when_not_specified(self, config_home):
-        """When custom_providers entry has no api_mode, stale api_mode is removed."""
-        import yaml
-        from hermes_cli.main import _model_flow_named_custom
-
-        # Pre-seed a stale api_mode in config
-        config_path = config_home / "config.yaml"
-        config_path.write_text(yaml.dump({"model": {"api_mode": "anthropic_messages"}}))
-
-        provider_info = {
-            "name": "My vLLM",
-            "base_url": "https://vllm.example.com/v1",
-            "api_key": "***",
-            "model": "llama-3",
-        }
-
-        with patch("hermes_cli.models.fetch_api_models", return_value=["llama-3"]), \
-             patch.dict("sys.modules", {"simple_term_menu": None}), \
-             patch("builtins.input", return_value="1"), \
-             patch("builtins.print"):
-            _model_flow_named_custom({}, provider_info)
-
-        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
-        model = config.get("model")
-        assert isinstance(model, dict)
-        assert "api_mode" not in model, "Stale api_mode should be removed"
@@ -1,461 +0,0 @@
-"""Tests for ``hermes debug`` CLI command and debug utilities."""
-
-import os
-import sys
-import urllib.error
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
-
-import pytest
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-@pytest.fixture
-def hermes_home(tmp_path, monkeypatch):
-    """Set up an isolated HERMES_HOME with minimal logs."""
-    home = tmp_path / ".hermes"
-    home.mkdir()
-    monkeypatch.setenv("HERMES_HOME", str(home))
-
-    # Create log files
-    logs_dir = home / "logs"
-    logs_dir.mkdir()
-    (logs_dir / "agent.log").write_text(
-        "2026-04-12 17:00:00 INFO agent: session started\n"
-        "2026-04-12 17:00:01 INFO tools.terminal: running ls\n"
-        "2026-04-12 17:00:02 WARNING agent: high token usage\n"
-    )
-    (logs_dir / "errors.log").write_text(
-        "2026-04-12 17:00:05 ERROR gateway.run: connection lost\n"
-    )
-    (logs_dir / "gateway.log").write_text(
-        "2026-04-12 17:00:10 INFO gateway.run: started\n"
-    )
-
-    return home
-
-
-# ---------------------------------------------------------------------------
-# Unit tests for upload helpers
-# ---------------------------------------------------------------------------
-
-class TestUploadPasteRs:
-    """Test paste.rs upload path."""
-
-    def test_upload_paste_rs_success(self):
-        from hermes_cli.debug import _upload_paste_rs
-
-        mock_resp = MagicMock()
-        mock_resp.read.return_value = b"https://paste.rs/abc123\n"
-        mock_resp.__enter__ = lambda s: s
-        mock_resp.__exit__ = MagicMock(return_value=False)
-
-        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
-            url = _upload_paste_rs("hello world")
-
-        assert url == "https://paste.rs/abc123"
-
-    def test_upload_paste_rs_bad_response(self):
-        from hermes_cli.debug import _upload_paste_rs
-
-        mock_resp = MagicMock()
-        mock_resp.read.return_value = b"<html>error</html>"
-        mock_resp.__enter__ = lambda s: s
-        mock_resp.__exit__ = MagicMock(return_value=False)
-
-        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
-            with pytest.raises(ValueError, match="Unexpected response"):
-                _upload_paste_rs("test")
-
-    def test_upload_paste_rs_network_error(self):
-        from hermes_cli.debug import _upload_paste_rs
-
-        with patch(
-            "hermes_cli.debug.urllib.request.urlopen",
-            side_effect=urllib.error.URLError("connection refused"),
-        ):
-            with pytest.raises(urllib.error.URLError):
-                _upload_paste_rs("test")
-
-
-class TestUploadDpasteCom:
-    """Test dpaste.com fallback upload path."""
-
-    def test_upload_dpaste_com_success(self):
-        from hermes_cli.debug import _upload_dpaste_com
-
-        mock_resp = MagicMock()
-        mock_resp.read.return_value = b"https://dpaste.com/ABCDEFG\n"
-        mock_resp.__enter__ = lambda s: s
-        mock_resp.__exit__ = MagicMock(return_value=False)
-
-        with patch("hermes_cli.debug.urllib.request.urlopen", return_value=mock_resp):
-            url = _upload_dpaste_com("hello world", expiry_days=7)
-
-        assert url == "https://dpaste.com/ABCDEFG"
-
-
-class TestUploadToPastebin:
-    """Test the combined upload with fallback."""
-
-    def test_tries_paste_rs_first(self):
-        from hermes_cli.debug import upload_to_pastebin
-
-        with patch("hermes_cli.debug._upload_paste_rs",
-                    return_value="https://paste.rs/test") as prs:
-            url = upload_to_pastebin("content")
-
-        assert url == "https://paste.rs/test"
-        prs.assert_called_once()
-
-    def test_falls_back_to_dpaste_com(self):
-        from hermes_cli.debug import upload_to_pastebin
-
-        with patch("hermes_cli.debug._upload_paste_rs",
-                    side_effect=Exception("down")), \
-             patch("hermes_cli.debug._upload_dpaste_com",
-                    return_value="https://dpaste.com/TEST") as dp:
-            url = upload_to_pastebin("content")
-
-        assert url == "https://dpaste.com/TEST"
-        dp.assert_called_once()
-
-    def test_raises_when_both_fail(self):
-        from hermes_cli.debug import upload_to_pastebin
-
-        with patch("hermes_cli.debug._upload_paste_rs",
-                    side_effect=Exception("err1")), \
-             patch("hermes_cli.debug._upload_dpaste_com",
-                    side_effect=Exception("err2")):
-            with pytest.raises(RuntimeError, match="Failed to upload"):
-                upload_to_pastebin("content")
-
-
-# ---------------------------------------------------------------------------
-# Log reading
-# ---------------------------------------------------------------------------
-
-class TestReadFullLog:
-    """Test _read_full_log for standalone log uploads."""
-
-    def test_reads_small_file(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
-
-        content = _read_full_log("agent")
-        assert content is not None
-        assert "session started" in content
-
-    def test_returns_none_for_missing(self, tmp_path, monkeypatch):
-        home = tmp_path / ".hermes"
-        home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(home))
-
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
-
-    def test_returns_none_for_empty(self, hermes_home):
-        # Truncate agent.log to empty
-        (hermes_home / "logs" / "agent.log").write_text("")
-
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
-
-    def test_truncates_large_file(self, hermes_home):
-        """Files larger than max_bytes get tail-truncated."""
-        from hermes_cli.debug import _read_full_log
-
-        # Write a file larger than 1KB
-        big_content = "x" * 100 + "\n"
-        (hermes_home / "logs" / "agent.log").write_text(big_content * 200)
-
-        content = _read_full_log("agent", max_bytes=1024)
-        assert content is not None
-        assert "truncated" in content
-
-    def test_unknown_log_returns_none(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("nonexistent") is None
-
-    def test_falls_back_to_rotated_file(self, hermes_home):
-        """When gateway.log doesn't exist, falls back to gateway.log.1."""
-        from hermes_cli.debug import _read_full_log
-
-        logs_dir = hermes_home / "logs"
-        # Remove the primary (if any) and create a .1 rotation
-        (logs_dir / "gateway.log").unlink(missing_ok=True)
-        (logs_dir / "gateway.log.1").write_text(
-            "2026-04-12 10:00:00 INFO gateway.run: rotated content\n"
-        )
-
-        content = _read_full_log("gateway")
-        assert content is not None
-        assert "rotated content" in content
-
-    def test_prefers_primary_over_rotated(self, hermes_home):
-        """Primary log is used when it exists, even if .1 also exists."""
-        from hermes_cli.debug import _read_full_log
-
-        logs_dir = hermes_home / "logs"
-        (logs_dir / "gateway.log").write_text("primary content\n")
-        (logs_dir / "gateway.log.1").write_text("rotated content\n")
-
-        content = _read_full_log("gateway")
-        assert "primary content" in content
-        assert "rotated" not in content
-
-    def test_falls_back_when_primary_empty(self, hermes_home):
-        """Empty primary log falls back to .1 rotation."""
-        from hermes_cli.debug import _read_full_log
-
-        logs_dir = hermes_home / "logs"
-        (logs_dir / "agent.log").write_text("")
-        (logs_dir / "agent.log.1").write_text("rotated agent data\n")
-
-        content = _read_full_log("agent")
-        assert content is not None
-        assert "rotated agent data" in content
-
-
-# ---------------------------------------------------------------------------
-# Debug report collection
-# ---------------------------------------------------------------------------
-
-class TestCollectDebugReport:
-    """Test the debug report builder."""
-
-    def test_report_includes_dump_output(self, hermes_home):
-        from hermes_cli.debug import collect_debug_report
-
-        with patch("hermes_cli.dump.run_dump") as mock_dump:
-            mock_dump.side_effect = lambda args: print(
-                "--- hermes dump ---\nversion: 0.8.0\n--- end dump ---"
-            )
-            report = collect_debug_report(log_lines=50)
-
-        assert "--- hermes dump ---" in report
-        assert "version: 0.8.0" in report
-
-    def test_report_includes_agent_log(self, hermes_home):
-        from hermes_cli.debug import collect_debug_report
-
-        with patch("hermes_cli.dump.run_dump"):
-            report = collect_debug_report(log_lines=50)
-
-        assert "--- agent.log" in report
-        assert "session started" in report
-
-    def test_report_includes_errors_log(self, hermes_home):
-        from hermes_cli.debug import collect_debug_report
-
-        with patch("hermes_cli.dump.run_dump"):
-            report = collect_debug_report(log_lines=50)
-
-        assert "--- errors.log" in report
-        assert "connection lost" in report
-
-    def test_report_includes_gateway_log(self, hermes_home):
-        from hermes_cli.debug import collect_debug_report
-
-        with patch("hermes_cli.dump.run_dump"):
-            report = collect_debug_report(log_lines=50)
-
-        assert "--- gateway.log" in report
-
-    def test_missing_logs_handled(self, tmp_path, monkeypatch):
-        home = tmp_path / ".hermes"
-        home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(home))
-
-        from hermes_cli.debug import collect_debug_report
-
-        with patch("hermes_cli.dump.run_dump"):
-            report = collect_debug_report(log_lines=50)
-
-        assert "(file not found)" in report
-
-
-# ---------------------------------------------------------------------------
-# CLI entry point — run_debug_share
-# ---------------------------------------------------------------------------
-
-class TestRunDebugShare:
-    """Test the run_debug_share CLI handler."""
-
-    def test_local_flag_prints_full_logs(self, hermes_home, capsys):
-        """--local prints the report plus full log contents."""
-        from hermes_cli.debug import run_debug_share
-
-        args = MagicMock()
-        args.lines = 50
-        args.expire = 7
-        args.local = True
-
-        with patch("hermes_cli.dump.run_dump"):
-            run_debug_share(args)
-
-        out = capsys.readouterr().out
-        assert "--- agent.log" in out
-        assert "FULL agent.log" in out
-        assert "FULL gateway.log" in out
-
-    def test_share_uploads_three_pastes(self, hermes_home, capsys):
-        """Successful share uploads report + agent.log + gateway.log."""
-        from hermes_cli.debug import run_debug_share
-
-        args = MagicMock()
-        args.lines = 50
-        args.expire = 7
-        args.local = False
-
-        call_count = [0]
-        uploaded_content = []
-        def _mock_upload(content, expiry_days=7):
-            call_count[0] += 1
-            uploaded_content.append(content)
-            return f"https://paste.rs/paste{call_count[0]}"
-
-        with patch("hermes_cli.dump.run_dump") as mock_dump, \
-             patch("hermes_cli.debug.upload_to_pastebin",
-                    side_effect=_mock_upload):
-            mock_dump.side_effect = lambda a: print("--- hermes dump ---\nversion: test\n--- end dump ---")
-            run_debug_share(args)
-
-        out = capsys.readouterr().out
-        # Should have 3 uploads: report, agent.log, gateway.log
-        assert call_count[0] == 3
-        assert "paste.rs/paste1" in out  # Report
-        assert "paste.rs/paste2" in out  # agent.log
-        assert "paste.rs/paste3" in out  # gateway.log
-        assert "Report" in out
-        assert "agent.log" in out
-        assert "gateway.log" in out
-
-        # Each log paste should start with the dump header
-        agent_paste = uploaded_content[1]
-        assert "--- hermes dump ---" in agent_paste
-        assert "--- full agent.log ---" in agent_paste
-        gateway_paste = uploaded_content[2]
-        assert "--- hermes dump ---" in gateway_paste
-        assert "--- full gateway.log ---" in gateway_paste
-
-    def test_share_skips_missing_logs(self, tmp_path, monkeypatch, capsys):
-        """Only uploads logs that exist."""
-        home = tmp_path / ".hermes"
-        home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(home))
-
-        from hermes_cli.debug import run_debug_share
-
-        args = MagicMock()
-        args.lines = 50
-        args.expire = 7
-        args.local = False
-
-        call_count = [0]
-        def _mock_upload(content, expiry_days=7):
-            call_count[0] += 1
-            return f"https://paste.rs/paste{call_count[0]}"
-
-        with patch("hermes_cli.dump.run_dump"), \
-             patch("hermes_cli.debug.upload_to_pastebin",
-                    side_effect=_mock_upload):
-            run_debug_share(args)
-
-        out = capsys.readouterr().out
-        # Only the report should be uploaded (no log files exist)
-        assert call_count[0] == 1
-        assert "Report" in out
-
-    def test_share_continues_on_log_upload_failure(self, hermes_home, capsys):
-        """Log upload failure doesn't stop the report from being shared."""
-        from hermes_cli.debug import run_debug_share
-
-        args = MagicMock()
-        args.lines = 50
-        args.expire = 7
-        args.local = False
-
-        call_count = [0]
-        def _mock_upload(content, expiry_days=7):
-            call_count[0] += 1
-            if call_count[0] > 1:
-                raise RuntimeError("upload failed")
-            return "https://paste.rs/report"
-
-        with patch("hermes_cli.dump.run_dump"), \
-             patch("hermes_cli.debug.upload_to_pastebin",
-                    side_effect=_mock_upload):
-            run_debug_share(args)
-
-        out = capsys.readouterr().out
-        assert "Report" in out
-        assert "paste.rs/report" in out
-        assert "failed to upload" in out
-
-    def test_share_exits_on_report_upload_failure(self, hermes_home, capsys):
-        """If the main report fails to upload, exit with code 1."""
-        from hermes_cli.debug import run_debug_share
-
-        args = MagicMock()
-        args.lines = 50
-        args.expire = 7
-        args.local = False
-
-        with patch("hermes_cli.dump.run_dump"), \
-             patch("hermes_cli.debug.upload_to_pastebin",
-                    side_effect=RuntimeError("all failed")):
-            with pytest.raises(SystemExit) as exc_info:
-                run_debug_share(args)
-
-        assert exc_info.value.code == 1
-        out = capsys.readouterr()
-        assert "all failed" in out.err
-
-
-# ---------------------------------------------------------------------------
-# run_debug router
-# ---------------------------------------------------------------------------
-
-class TestRunDebug:
-    def test_no_subcommand_shows_usage(self, capsys):
-        from hermes_cli.debug import run_debug
-
-        args = MagicMock()
-        args.debug_command = None
-
-        run_debug(args)
-
-        out = capsys.readouterr().out
-        assert "hermes debug share" in out
-
-    def test_share_subcommand_routes(self, hermes_home):
-        from hermes_cli.debug import run_debug
-
-        args = MagicMock()
-        args.debug_command = "share"
-        args.lines = 200
-        args.expire = 7
-        args.local = True
-
-        with patch("hermes_cli.dump.run_dump"):
-            run_debug(args)
-
-
-# ---------------------------------------------------------------------------
-# Argparse integration
-# ---------------------------------------------------------------------------
-
-class TestArgparseIntegration:
-    def test_module_imports_clean(self):
-        from hermes_cli.debug import run_debug, run_debug_share
-        assert callable(run_debug)
-        assert callable(run_debug_share)
-
-    def test_cmd_debug_dispatches(self):
-        from hermes_cli.main import cmd_debug
-
-        args = MagicMock()
-        args.debug_command = None
-        cmd_debug(args)
@@ -394,21 +394,6 @@ class TestLaunchdServiceRecovery:


 class TestGatewayServiceDetection:
-    def test_supports_systemd_services_requires_systemctl_binary(self, monkeypatch):
-        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-        monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: None)
-
-        assert gateway_cli.supports_systemd_services() is False
-
-    def test_supports_systemd_services_returns_true_when_systemctl_present(self, monkeypatch):
-        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
-        monkeypatch.setattr(gateway_cli.shutil, "which", lambda name: "/usr/bin/systemctl")
-
-        assert gateway_cli.supports_systemd_services() is True
-
    def test_is_service_running_checks_system_scope_when_user_scope_is_inactive(self, monkeypatch):
        user_unit = SimpleNamespace(exists=lambda: True)
        system_unit = SimpleNamespace(exists=lambda: True)
@@ -433,23 +418,6 @@ class TestGatewayServiceDetection:

        assert gateway_cli._is_service_running() is True

-    def test_is_service_running_returns_false_when_systemctl_missing(self, monkeypatch):
-        unit = SimpleNamespace(exists=lambda: True)
-
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
-        monkeypatch.setattr(
-            gateway_cli,
-            "get_systemd_unit_path",
-            lambda system=False: unit,
-        )
-
-        def fake_run(*args, **kwargs):
-            raise FileNotFoundError("systemctl")
-
-        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
-
-        assert gateway_cli._is_service_running() is False
-

 class TestGatewaySystemServiceRouting:
    def test_systemd_restart_self_requests_graceful_restart_without_reload_or_restart(self, monkeypatch, capsys):
@@ -1033,91 +1001,3 @@ class TestSystemUnitPathRemapping:
        # Target user paths should be present
        assert "/home/alice" in unit
        assert "WorkingDirectory=/home/alice/.hermes/hermes-agent" in unit
-
-
-class TestDockerAwareGateway:
-    """Tests for Docker container awareness in gateway commands."""
-
-    def test_run_systemctl_raises_runtimeerror_when_missing(self, monkeypatch):
-        """_run_systemctl raises RuntimeError with container guidance when systemctl is absent."""
-        import pytest
-
-        def fake_run(cmd, **kwargs):
-            raise FileNotFoundError("systemctl")
-
-        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
-
-        with pytest.raises(RuntimeError, match="systemctl is not available"):
-            gateway_cli._run_systemctl(["start", "hermes-gateway"])
-
-    def test_run_systemctl_passes_through_on_success(self, monkeypatch):
-        """_run_systemctl delegates to subprocess.run when systemctl exists."""
-        calls = []
-
-        def fake_run(cmd, **kwargs):
-            calls.append(cmd)
-            return SimpleNamespace(returncode=0, stdout="", stderr="")
-
-        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
-
-        result = gateway_cli._run_systemctl(["status", "hermes-gateway"])
-        assert result.returncode == 0
-        assert len(calls) == 1
-        assert "status" in calls[0]
-
-    def test_install_in_container_prints_docker_guidance(self, monkeypatch, capsys):
-        """'hermes gateway install' inside Docker exits 0 with container guidance."""
-        import pytest
-
-        monkeypatch.setattr(gateway_cli, "is_managed", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
-
-        args = SimpleNamespace(gateway_command="install", force=False, system=False, run_as_user=None)
-        with pytest.raises(SystemExit) as exc_info:
-            gateway_cli.gateway_command(args)
-
-        assert exc_info.value.code == 0
-        out = capsys.readouterr().out
-        assert "Docker" in out or "docker" in out
-        assert "restart" in out.lower()
-
-    def test_uninstall_in_container_prints_docker_guidance(self, monkeypatch, capsys):
-        """'hermes gateway uninstall' inside Docker exits 0 with container guidance."""
-        import pytest
-
-        monkeypatch.setattr(gateway_cli, "is_managed", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
-
-        args = SimpleNamespace(gateway_command="uninstall", system=False)
-        with pytest.raises(SystemExit) as exc_info:
-            gateway_cli.gateway_command(args)
-
-        assert exc_info.value.code == 0
-        out = capsys.readouterr().out
-        assert "docker" in out.lower()
-
-    def test_start_in_container_prints_docker_guidance(self, monkeypatch, capsys):
-        """'hermes gateway start' inside Docker exits 0 with container guidance."""
-        import pytest
-
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_container", lambda: True)
-
-        args = SimpleNamespace(gateway_command="start", system=False)
-        with pytest.raises(SystemExit) as exc_info:
-            gateway_cli.gateway_command(args)
-
-        assert exc_info.value.code == 0
-        out = capsys.readouterr().out
-        assert "docker" in out.lower()
-        assert "hermes gateway run" in out
@@ -54,19 +54,14 @@ class TestAnthropicDotToHyphen:

 # ── OpenCode Zen regression ────────────────────────────────────────────

-class TestOpenCodeZenModelNormalization:
-    """OpenCode Zen preserves dots for most models, but Claude stays hyphenated."""
+class TestOpenCodeZenDotToHyphen:
+    """OpenCode Zen follows Anthropic convention (dots→hyphens)."""

    @pytest.mark.parametrize("model,expected", [
        ("claude-sonnet-4.6", "claude-sonnet-4-6"),
-        ("opencode-zen/claude-opus-4.5", "claude-opus-4-5"),
-        ("glm-4.5", "glm-4.5"),
-        ("glm-5.1", "glm-5.1"),
-        ("gpt-5.4", "gpt-5.4"),
-        ("minimax-m2.5-free", "minimax-m2.5-free"),
-        ("kimi-k2.5", "kimi-k2.5"),
+        ("glm-4.5", "glm-4-5"),
    ])
-    def test_zen_normalizes_models(self, model, expected):
+    def test_zen_converts_dots(self, model, expected):
        result = normalize_model_for_provider(model, "opencode-zen")
        assert result == expected

@@ -74,10 +69,6 @@ class TestOpenCodeZenModelNormalization:
        result = normalize_model_for_provider("opencode-zen/claude-sonnet-4.6", "opencode-zen")
        assert result == "claude-sonnet-4-6"

-    def test_zen_strips_vendor_prefix_for_non_claude(self):
-        result = normalize_model_for_provider("opencode-zen/glm-5.1", "opencode-zen")
-        assert result == "glm-5.1"
-

 # ── Copilot dot preservation (regression) ──────────────────────────────

@@ -257,76 +257,3 @@ class TestProviderPersistsAfterModelSave:
        assert model.get("provider") == "opencode-go"
        assert model.get("default") == "minimax-m2.5"
        assert model.get("api_mode") == "anthropic_messages"
-
-
-class TestBaseUrlValidation:
-    """Reject non-URL values in the base URL prompt (e.g. shell commands)."""
-
-    def test_invalid_base_url_rejected(self, config_home, monkeypatch, capsys):
-        """Typing a non-URL string should not be saved as the base URL."""
-        from hermes_cli.auth import PROVIDER_REGISTRY
-
-        pconfig = PROVIDER_REGISTRY.get("zai")
-        if not pconfig:
-            pytest.skip("zai not in PROVIDER_REGISTRY")
-
-        monkeypatch.setenv("GLM_API_KEY", "test-key")
-
-        from hermes_cli.main import _model_flow_api_key_provider
-        from hermes_cli.config import load_config, get_env_value
-
-        # User types a shell command instead of a URL at the base URL prompt
-        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
-             patch("hermes_cli.auth.deactivate_provider"), \
-             patch("builtins.input", return_value="nano ~/.hermes/.env"):
-            _model_flow_api_key_provider(load_config(), "zai", "old-model")
-
-        # The garbage value should NOT have been saved
-        saved = get_env_value("GLM_BASE_URL") or ""
-        assert not saved or saved.startswith(("http://", "https://")), \
-            f"Non-URL value was saved as GLM_BASE_URL: {saved}"
-        captured = capsys.readouterr()
-        assert "Invalid URL" in captured.out
-
-    def test_valid_base_url_accepted(self, config_home, monkeypatch):
-        """A proper URL should be saved normally."""
-        from hermes_cli.auth import PROVIDER_REGISTRY
-
-        pconfig = PROVIDER_REGISTRY.get("zai")
-        if not pconfig:
-            pytest.skip("zai not in PROVIDER_REGISTRY")
-
-        monkeypatch.setenv("GLM_API_KEY", "test-key")
-
-        from hermes_cli.main import _model_flow_api_key_provider
-        from hermes_cli.config import load_config, get_env_value
-
-        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
-             patch("hermes_cli.auth.deactivate_provider"), \
-             patch("builtins.input", return_value="https://custom.z.ai/api/paas/v4"):
-            _model_flow_api_key_provider(load_config(), "zai", "old-model")
-
-        saved = get_env_value("GLM_BASE_URL") or ""
-        assert saved == "https://custom.z.ai/api/paas/v4"
-
-    def test_empty_base_url_keeps_default(self, config_home, monkeypatch):
-        """Pressing Enter (empty) should not change the base URL."""
-        from hermes_cli.auth import PROVIDER_REGISTRY
-
-        pconfig = PROVIDER_REGISTRY.get("zai")
-        if not pconfig:
-            pytest.skip("zai not in PROVIDER_REGISTRY")
-
-        monkeypatch.setenv("GLM_API_KEY", "test-key")
-        monkeypatch.delenv("GLM_BASE_URL", raising=False)
-
-        from hermes_cli.main import _model_flow_api_key_provider
-        from hermes_cli.config import load_config, get_env_value
-
-        with patch("hermes_cli.auth._prompt_model_selection", return_value="glm-5"), \
-             patch("hermes_cli.auth.deactivate_provider"), \
-             patch("builtins.input", return_value=""):
-            _model_flow_api_key_provider(load_config(), "zai", "old-model")
-
-        saved = get_env_value("GLM_BASE_URL") or ""
-        assert saved == "", "Empty input should not save a base URL"
@@ -1,84 +0,0 @@
-"""Tests for the Nous-Hermes-3/4 non-agentic warning detector.
-
-Prior to this check, the warning fired on any model whose name contained
-``"hermes"`` anywhere (case-insensitive). That false-positived on unrelated
-local Modelfiles such as ``hermes-brain:qwen3-14b-ctx16k`` — a tool-capable
-Qwen3 wrapper that happens to live under the "hermes" tag namespace.
-
-``is_nous_hermes_non_agentic`` should only match the actual Nous Research
-Hermes-3 / Hermes-4 chat family.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-from hermes_cli.model_switch import (
-    _HERMES_MODEL_WARNING,
-    _check_hermes_model_warning,
-    is_nous_hermes_non_agentic,
-)
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    [
-        "NousResearch/Hermes-3-Llama-3.1-70B",
-        "NousResearch/Hermes-3-Llama-3.1-405B",
-        "hermes-3",
-        "Hermes-3",
-        "hermes-4",
-        "hermes-4-405b",
-        "hermes_4_70b",
-        "openrouter/hermes3:70b",
-        "openrouter/nousresearch/hermes-4-405b",
-        "NousResearch/Hermes3",
-        "hermes-3.1",
-    ],
-)
-def test_matches_real_nous_hermes_chat_models(model_name: str) -> None:
-    assert is_nous_hermes_non_agentic(model_name), (
-        f"expected {model_name!r} to be flagged as Nous Hermes 3/4"
-    )
-    assert _check_hermes_model_warning(model_name) == _HERMES_MODEL_WARNING
-
-
-@pytest.mark.parametrize(
-    "model_name",
-    [
-        # Kyle's local Modelfile — qwen3:14b under a custom tag
-        "hermes-brain:qwen3-14b-ctx16k",
-        "hermes-brain:qwen3-14b-ctx32k",
-        "hermes-honcho:qwen3-8b-ctx8k",
-        # Plain unrelated models
-        "qwen3:14b",
-        "qwen3-coder:30b",
-        "qwen2.5:14b",
-        "claude-opus-4-6",
-        "anthropic/claude-sonnet-4.5",
-        "gpt-5",
-        "openai/gpt-4o",
-        "google/gemini-2.5-flash",
-        "deepseek-chat",
-        # Non-chat Hermes models we don't warn about
-        "hermes-llm-2",
-        "hermes2-pro",
-        "nous-hermes-2-mistral",
-        # Edge cases
-        "",
-        "hermes",  # bare "hermes" isn't the 3/4 family
-        "hermes-brain",
-        "brain-hermes-3-impostor",  # "3" not preceded by /: boundary
-    ],
-)
-def test_does_not_match_unrelated_models(model_name: str) -> None:
-    assert not is_nous_hermes_non_agentic(model_name), (
-        f"expected {model_name!r} NOT to be flagged as Nous Hermes 3/4"
-    )
-    assert _check_hermes_model_warning(model_name) == ""
-
-
-def test_none_like_inputs_are_safe() -> None:
-    assert is_nous_hermes_non_agentic("") is False
-    # Defensive: the helper shouldn't crash on None-ish falsy input either.
-    assert _check_hermes_model_warning("") == ""
@@ -177,8 +177,7 @@ class TestCreateProfile:
        # No error; optional files just not copied
        assert not (profile_dir / "config.yaml").exists()
        assert not (profile_dir / ".env").exists()
-        # SOUL.md is always seeded with the default even when clone source lacks it
-        assert (profile_dir / "SOUL.md").exists()
+        assert not (profile_dir / "SOUL.md").exists()


 # ===================================================================
@@ -1,4 +1,5 @@
-"""Tests for setup.py configuration flows."""
+"""Tests for setup_model_provider — verifies the delegation to
+select_provider_and_model() and config dict sync."""
 import json
 import sys
 import types
@@ -7,7 +8,6 @@ import pytest

 from hermes_cli.auth import get_active_provider
 from hermes_cli.config import load_config, save_config
-from hermes_cli import setup as setup_mod
 from hermes_cli.setup import setup_model_provider


@@ -144,85 +144,6 @@ def test_setup_custom_providers_synced(tmp_path, monkeypatch):
    assert reloaded.get("custom_providers") == [{"name": "Local", "base_url": "http://localhost:8080/v1"}]


-def test_setup_gateway_skips_service_install_when_systemctl_missing(monkeypatch, capsys):
-    env = {
-        "TELEGRAM_BOT_TOKEN": "",
-        "TELEGRAM_HOME_CHANNEL": "",
-        "DISCORD_BOT_TOKEN": "",
-        "DISCORD_HOME_CHANNEL": "",
-        "SLACK_BOT_TOKEN": "",
-        "SLACK_HOME_CHANNEL": "",
-        "MATRIX_HOMESERVER": "https://matrix.example.com",
-        "MATRIX_USER_ID": "@alice:example.com",
-        "MATRIX_PASSWORD": "",
-        "MATRIX_ACCESS_TOKEN": "token",
-        "BLUEBUBBLES_SERVER_URL": "",
-        "BLUEBUBBLES_HOME_CHANNEL": "",
-        "WHATSAPP_ENABLED": "",
-        "WEBHOOK_ENABLED": "",
-    }
-
-    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: env.get(key, ""))
-    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("platform.system", lambda: "Linux")
-
-    import hermes_cli.gateway as gateway_mod
-
-    monkeypatch.setattr(gateway_mod, "supports_systemd_services", lambda: False)
-    monkeypatch.setattr(gateway_mod, "is_macos", lambda: False)
-    monkeypatch.setattr(gateway_mod, "_is_service_installed", lambda: False)
-    monkeypatch.setattr(gateway_mod, "_is_service_running", lambda: False)
-
-    setup_mod.setup_gateway({})
-
-    out = capsys.readouterr().out
-    assert "Messaging platforms configured!" in out
-    assert "Start the gateway to bring your bots online:" in out
-    assert "hermes gateway" in out
-
-
-def test_setup_gateway_in_container_shows_docker_guidance(monkeypatch, capsys):
-    """setup_gateway() in a Docker container shows Docker-specific restart instructions."""
-    env = {
-        "TELEGRAM_BOT_TOKEN": "",
-        "TELEGRAM_HOME_CHANNEL": "",
-        "DISCORD_BOT_TOKEN": "",
-        "DISCORD_HOME_CHANNEL": "",
-        "SLACK_BOT_TOKEN": "",
-        "SLACK_HOME_CHANNEL": "",
-        "MATRIX_HOMESERVER": "https://matrix.example.com",
-        "MATRIX_USER_ID": "@alice:example.com",
-        "MATRIX_PASSWORD": "",
-        "MATRIX_ACCESS_TOKEN": "token",
-        "BLUEBUBBLES_SERVER_URL": "",
-        "BLUEBUBBLES_HOME_CHANNEL": "",
-        "WHATSAPP_ENABLED": "",
-        "WEBHOOK_ENABLED": "",
-    }
-
-    monkeypatch.setattr(setup_mod, "get_env_value", lambda key: env.get(key, ""))
-    monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *args, **kwargs: False)
-    monkeypatch.setattr("platform.system", lambda: "Linux")
-
-    import hermes_cli.gateway as gateway_mod
-
-    monkeypatch.setattr(gateway_mod, "supports_systemd_services", lambda: False)
-    monkeypatch.setattr(gateway_mod, "is_macos", lambda: False)
-    monkeypatch.setattr(gateway_mod, "_is_service_installed", lambda: False)
-    monkeypatch.setattr(gateway_mod, "_is_service_running", lambda: False)
-
-    # Patch is_container at the import location in setup.py
-    import hermes_constants
-    monkeypatch.setattr(hermes_constants, "is_container", lambda: True)
-
-    setup_mod.setup_gateway({})
-
-    out = capsys.readouterr().out
-    assert "Messaging platforms configured!" in out
-    assert "docker" in out.lower() or "Docker" in out
-    assert "restart" in out.lower()
-
-
 def test_setup_syncs_custom_provider_removal_from_disk(tmp_path, monkeypatch):
    """Removing the last custom provider in model setup should persist."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -798,120 +798,3 @@ class TestFindGatewayPidsExclude:
        pids = gateway_cli.find_gateway_pids()

        assert pids == [100]
-
-
-# ---------------------------------------------------------------------------
-# Gateway mode writes exit code before restart (#8300)
-# ---------------------------------------------------------------------------
-
-
-class TestGatewayModeWritesExitCodeEarly:
-    """When running as ``hermes update --gateway``, the exit code marker must be
-    written *before* the gateway restart attempt.  Without this, systemd's
-    ``KillMode=mixed`` kills the update process (and its wrapping shell) during
-    the cgroup teardown, so the shell epilogue that normally writes the exit
-    code never executes.  The new gateway's update watcher then polls for 30
-    minutes and sends a spurious timeout message.
-    """
-
-    @patch("shutil.which", return_value=None)
-    @patch("subprocess.run")
-    def test_exit_code_written_in_gateway_mode(
-        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
-    ):
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-
-        # Point HERMES_HOME at a temp dir so the marker file lands there
-        hermes_home = tmp_path / ".hermes"
-        hermes_home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        import hermes_cli.config as _cfg
-        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
-        # Also patch the module-level ref used by cmd_update
-        import hermes_cli.main as _main_mod
-        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
-
-        mock_run.side_effect = _make_run_side_effect(commit_count="1")
-
-        args = SimpleNamespace(gateway=True)
-
-        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
-            cmd_update(args)
-
-        exit_code_path = hermes_home / ".update_exit_code"
-        assert exit_code_path.exists(), ".update_exit_code not written in gateway mode"
-        assert exit_code_path.read_text() == "0"
-
-    @patch("shutil.which", return_value=None)
-    @patch("subprocess.run")
-    def test_exit_code_not_written_in_normal_mode(
-        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
-    ):
-        """Non-gateway mode should NOT write the exit code (the shell does it)."""
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: False)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-
-        hermes_home = tmp_path / ".hermes"
-        hermes_home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        import hermes_cli.config as _cfg
-        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
-        import hermes_cli.main as _main_mod
-        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
-
-        mock_run.side_effect = _make_run_side_effect(commit_count="1")
-
-        args = SimpleNamespace(gateway=False)
-
-        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
-            cmd_update(args)
-
-        exit_code_path = hermes_home / ".update_exit_code"
-        assert not exit_code_path.exists(), ".update_exit_code should not be written outside gateway mode"
-
-    @patch("shutil.which", return_value=None)
-    @patch("subprocess.run")
-    def test_exit_code_written_before_restart_call(
-        self, mock_run, _mock_which, capsys, tmp_path, monkeypatch,
-    ):
-        """Exit code must exist BEFORE systemctl restart is called."""
-        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
-        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
-
-        hermes_home = tmp_path / ".hermes"
-        hermes_home.mkdir()
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        import hermes_cli.config as _cfg
-        monkeypatch.setattr(_cfg, "get_hermes_home", lambda: hermes_home)
-        import hermes_cli.main as _main_mod
-        monkeypatch.setattr(_main_mod, "get_hermes_home", lambda: hermes_home)
-
-        exit_code_path = hermes_home / ".update_exit_code"
-
-        # Track whether exit code exists when systemctl restart is called
-        exit_code_existed_at_restart = []
-
-        original_side_effect = _make_run_side_effect(
-            commit_count="1", systemd_active=True,
-        )
-
-        def tracking_side_effect(cmd, **kwargs):
-            joined = " ".join(str(c) for c in cmd)
-            if "systemctl" in joined and "restart" in joined:
-                exit_code_existed_at_restart.append(exit_code_path.exists())
-            return original_side_effect(cmd, **kwargs)
-
-        mock_run.side_effect = tracking_side_effect
-
-        args = SimpleNamespace(gateway=True)
-
-        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
-            cmd_update(args)
-
-        assert exit_code_existed_at_restart, "systemctl restart was never called"
-        assert exit_code_existed_at_restart[0] is True, \
-            ".update_exit_code must exist BEFORE systemctl restart (cgroup kill race)"
@@ -1,675 +0,0 @@
-"""Tests for hermes_cli.web_server and related config utilities."""
-
-import os
-import json
-import tempfile
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-import pytest
-
-from hermes_cli.config import (
-    DEFAULT_CONFIG,
-    reload_env,
-    redact_key,
-    _EXTRA_ENV_KEYS,
-    OPTIONAL_ENV_VARS,
-)
-
-
-# ---------------------------------------------------------------------------
-# reload_env tests
-# ---------------------------------------------------------------------------
-
-
-class TestReloadEnv:
-    """Tests for reload_env() — re-reads .env into os.environ."""
-
-    def test_adds_new_vars(self, tmp_path):
-        """reload_env() adds vars from .env that are not in os.environ."""
-        env_file = tmp_path / ".env"
-        env_file.write_text("TEST_RELOAD_VAR=hello123\n")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
-            os.environ.pop("TEST_RELOAD_VAR", None)
-            count = reload_env()
-            assert count >= 1
-            assert os.environ.get("TEST_RELOAD_VAR") == "hello123"
-        os.environ.pop("TEST_RELOAD_VAR", None)
-
-    def test_updates_changed_vars(self, tmp_path):
-        """reload_env() updates vars whose value changed on disk."""
-        env_file = tmp_path / ".env"
-        env_file.write_text("TEST_RELOAD_VAR=old_value\n")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
-            os.environ["TEST_RELOAD_VAR"] = "old_value"
-            # Now change the file
-            env_file.write_text("TEST_RELOAD_VAR=new_value\n")
-            count = reload_env()
-            assert count >= 1
-            assert os.environ.get("TEST_RELOAD_VAR") == "new_value"
-        os.environ.pop("TEST_RELOAD_VAR", None)
-
-    def test_removes_deleted_known_vars(self, tmp_path):
-        """reload_env() removes known Hermes vars not present in .env."""
-        env_file = tmp_path / ".env"
-        env_file.write_text("")  # empty .env
-        # Pick a known key from OPTIONAL_ENV_VARS
-        known_key = next(iter(OPTIONAL_ENV_VARS.keys()))
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
-            os.environ[known_key] = "stale_value"
-            count = reload_env()
-            assert known_key not in os.environ
-            assert count >= 1
-
-    def test_does_not_remove_unknown_vars(self, tmp_path):
-        """reload_env() preserves non-Hermes env vars even when absent from .env."""
-        env_file = tmp_path / ".env"
-        env_file.write_text("")
-        with patch("hermes_cli.config.get_env_path", return_value=env_file):
-            os.environ["MY_CUSTOM_UNRELATED_VAR"] = "keep_me"
-            reload_env()
-            assert os.environ.get("MY_CUSTOM_UNRELATED_VAR") == "keep_me"
-        os.environ.pop("MY_CUSTOM_UNRELATED_VAR", None)
-
-
-# ---------------------------------------------------------------------------
-# redact_key tests
-# ---------------------------------------------------------------------------
-
-
-class TestRedactKey:
-    def test_long_key_shows_prefix_suffix(self):
-        result = redact_key("sk-1234567890abcdef")
-        assert result.startswith("sk-1")
-        assert result.endswith("cdef")
-        assert "..." in result
-
-    def test_short_key_fully_masked(self):
-        assert redact_key("short") == "***"
-
-    def test_empty_key(self):
-        result = redact_key("")
-        assert "not set" in result.lower() or result == "***" or "\x1b" in result
-
-
-# ---------------------------------------------------------------------------
-# web_server tests (FastAPI endpoints)
-# ---------------------------------------------------------------------------
-
-
-class TestWebServerEndpoints:
-    """Test the FastAPI REST endpoints using Starlette TestClient."""
-
-    @pytest.fixture(autouse=True)
-    def _setup_test_client(self):
-        """Create a TestClient — import is deferred to avoid requiring fastapi."""
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-
-        from hermes_cli.web_server import app
-        self.client = TestClient(app)
-
-    def test_get_status(self):
-        resp = self.client.get("/api/status")
-        assert resp.status_code == 200
-        data = resp.json()
-        assert "version" in data
-        assert "hermes_home" in data
-        assert "active_sessions" in data
-
-    def test_get_status_filters_unconfigured_gateway_platforms(self, monkeypatch):
-        import gateway.config as gateway_config
-        import hermes_cli.web_server as web_server
-
-        class _Platform:
-            def __init__(self, value):
-                self.value = value
-
-        class _GatewayConfig:
-            def get_connected_platforms(self):
-                return [_Platform("telegram")]
-
-        monkeypatch.setattr(web_server, "get_running_pid", lambda: 1234)
-        monkeypatch.setattr(
-            web_server,
-            "read_runtime_status",
-            lambda: {
-                "gateway_state": "running",
-                "updated_at": "2026-04-12T00:00:00+00:00",
-                "platforms": {
-                    "telegram": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
-                    "whatsapp": {"state": "retrying", "updated_at": "2026-04-12T00:00:00+00:00"},
-                    "feishu": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
-                },
-            },
-        )
-        monkeypatch.setattr(web_server, "check_config_version", lambda: (1, 1))
-        monkeypatch.setattr(gateway_config, "load_gateway_config", lambda: _GatewayConfig())
-
-        resp = self.client.get("/api/status")
-
-        assert resp.status_code == 200
-        assert resp.json()["gateway_platforms"] == {
-            "telegram": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
-        }
-
-    def test_get_status_hides_stale_platforms_when_gateway_not_running(self, monkeypatch):
-        import gateway.config as gateway_config
-        import hermes_cli.web_server as web_server
-
-        class _GatewayConfig:
-            def get_connected_platforms(self):
-                return []
-
-        monkeypatch.setattr(web_server, "get_running_pid", lambda: None)
-        monkeypatch.setattr(
-            web_server,
-            "read_runtime_status",
-            lambda: {
-                "gateway_state": "startup_failed",
-                "updated_at": "2026-04-12T00:00:00+00:00",
-                "platforms": {
-                    "whatsapp": {"state": "retrying", "updated_at": "2026-04-12T00:00:00+00:00"},
-                    "feishu": {"state": "connected", "updated_at": "2026-04-12T00:00:00+00:00"},
-                },
-            },
-        )
-        monkeypatch.setattr(web_server, "check_config_version", lambda: (1, 1))
-        monkeypatch.setattr(gateway_config, "load_gateway_config", lambda: _GatewayConfig())
-
-        resp = self.client.get("/api/status")
-
-        assert resp.status_code == 200
-        assert resp.json()["gateway_state"] == "startup_failed"
-        assert resp.json()["gateway_platforms"] == {}
-
-    def test_get_config_schema(self):
-        resp = self.client.get("/api/config/schema")
-        assert resp.status_code == 200
-        data = resp.json()
-        assert "fields" in data
-        assert "category_order" in data
-        schema = data["fields"]
-        assert len(schema) > 100  # Should have 150+ fields
-        assert "model" in schema
-        # Verify category_order is a non-empty list
-        assert isinstance(data["category_order"], list)
-        assert len(data["category_order"]) > 0
-        assert "general" in data["category_order"]
-
-    def test_get_config_defaults(self):
-        resp = self.client.get("/api/config/defaults")
-        assert resp.status_code == 200
-        defaults = resp.json()
-        assert "model" in defaults
-
-    def test_get_env_vars(self):
-        resp = self.client.get("/api/env")
-        assert resp.status_code == 200
-        data = resp.json()
-        # Should contain known env var names
-        assert any(k.endswith("_API_KEY") or k.endswith("_TOKEN") for k in data.keys())
-
-    def test_reveal_env_var(self, tmp_path):
-        """POST /api/env/reveal should return the real unredacted value."""
-        from hermes_cli.config import save_env_value
-        from hermes_cli.web_server import _SESSION_TOKEN
-        save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345")
-        resp = self.client.post(
-            "/api/env/reveal",
-            json={"key": "TEST_REVEAL_KEY"},
-            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
-        )
-        assert resp.status_code == 200
-        data = resp.json()
-        assert data["key"] == "TEST_REVEAL_KEY"
-        assert data["value"] == "super-secret-value-12345"
-
-    def test_reveal_env_var_not_found(self):
-        """POST /api/env/reveal should 404 for unknown keys."""
-        from hermes_cli.web_server import _SESSION_TOKEN
-        resp = self.client.post(
-            "/api/env/reveal",
-            json={"key": "NONEXISTENT_KEY_XYZ"},
-            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
-        )
-        assert resp.status_code == 404
-
-    def test_reveal_env_var_no_token(self, tmp_path):
-        """POST /api/env/reveal without token should return 401."""
-        from hermes_cli.config import save_env_value
-        save_env_value("TEST_REVEAL_NOAUTH", "secret-value")
-        resp = self.client.post(
-            "/api/env/reveal",
-            json={"key": "TEST_REVEAL_NOAUTH"},
-        )
-        assert resp.status_code == 401
-
-    def test_reveal_env_var_bad_token(self, tmp_path):
-        """POST /api/env/reveal with wrong token should return 401."""
-        from hermes_cli.config import save_env_value
-        save_env_value("TEST_REVEAL_BADAUTH", "secret-value")
-        resp = self.client.post(
-            "/api/env/reveal",
-            json={"key": "TEST_REVEAL_BADAUTH"},
-            headers={"Authorization": "Bearer wrong-token-here"},
-        )
-        assert resp.status_code == 401
-
-    def test_session_token_endpoint(self):
-        """GET /api/auth/session-token should return a token."""
-        from hermes_cli.web_server import _SESSION_TOKEN
-        resp = self.client.get("/api/auth/session-token")
-        assert resp.status_code == 200
-        assert resp.json()["token"] == _SESSION_TOKEN
-
-    def test_path_traversal_blocked(self):
-        """Verify URL-encoded path traversal is blocked."""
-        # %2e%2e = ..
-        resp = self.client.get("/%2e%2e/%2e%2e/etc/passwd")
-        # Should return 200 with index.html (SPA fallback), not the actual file
-        assert resp.status_code in (200, 404)
-        if resp.status_code == 200:
-            # Should be the SPA fallback, not the system file
-            assert "root:" not in resp.text
-
-    def test_path_traversal_dotdot_blocked(self):
-        """Direct .. path traversal via encoded sequences."""
-        resp = self.client.get("/%2e%2e/hermes_cli/web_server.py")
-        assert resp.status_code in (200, 404)
-        if resp.status_code == 200:
-            assert "FastAPI" not in resp.text  # Should not serve the actual source
-
-
-# ---------------------------------------------------------------------------
-# _build_schema_from_config tests
-# ---------------------------------------------------------------------------
-
-
-class TestBuildSchemaFromConfig:
-    def test_produces_expected_field_count(self):
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        # DEFAULT_CONFIG has ~150+ leaf fields
-        assert len(CONFIG_SCHEMA) > 100
-
-    def test_schema_entries_have_required_fields(self):
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        for key, entry in list(CONFIG_SCHEMA.items())[:10]:
-            assert "type" in entry, f"Missing type for {key}"
-            assert "category" in entry, f"Missing category for {key}"
-
-    def test_overrides_applied(self):
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        # terminal.backend should be a select with options
-        if "terminal.backend" in CONFIG_SCHEMA:
-            entry = CONFIG_SCHEMA["terminal.backend"]
-            assert entry["type"] == "select"
-            assert "options" in entry
-            assert "local" in entry["options"]
-
-    def test_empty_prefix_produces_correct_keys(self):
-        from hermes_cli.web_server import _build_schema_from_config
-        test_config = {"model": "test", "nested": {"key": "val"}}
-        schema = _build_schema_from_config(test_config)
-        assert "model" in schema
-        assert "nested.key" in schema
-
-    def test_top_level_scalars_get_general_category(self):
-        """Top-level scalar fields should be in 'general' category."""
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        assert CONFIG_SCHEMA["model"]["category"] == "general"
-
-    def test_nested_keys_get_parent_category(self):
-        """Nested fields should use the top-level parent as their category."""
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        if "agent.max_turns" in CONFIG_SCHEMA:
-            assert CONFIG_SCHEMA["agent.max_turns"]["category"] == "agent"
-
-    def test_category_merge_applied(self):
-        """Small categories should be merged into larger ones."""
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        categories = {e["category"] for e in CONFIG_SCHEMA.values()}
-        # These should be merged away
-        assert "privacy" not in categories  # merged into security
-        assert "context" not in categories  # merged into agent
-
-    def test_no_single_field_categories(self):
-        """After merging, no category should have just 1 field."""
-        from hermes_cli.web_server import CONFIG_SCHEMA
-        from collections import Counter
-        cats = Counter(e["category"] for e in CONFIG_SCHEMA.values())
-        for cat, count in cats.items():
-            assert count >= 2, f"Category '{cat}' has only {count} field(s) — should be merged"
-
-
-# ---------------------------------------------------------------------------
-# Config round-trip tests
-# ---------------------------------------------------------------------------
-
-
-class TestConfigRoundTrip:
-    """Verify config survives GET → edit → PUT without data loss."""
-
-    @pytest.fixture(autouse=True)
-    def _setup(self):
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app
-        self.client = TestClient(app)
-
-    def test_get_config_no_internal_keys(self):
-        """GET /api/config should not expose _config_version or _model_meta."""
-        config = self.client.get("/api/config").json()
-        internal = [k for k in config if k.startswith("_")]
-        assert not internal, f"Internal keys leaked to frontend: {internal}"
-
-    def test_get_config_model_is_string(self):
-        """GET /api/config should normalize model dict to a string."""
-        config = self.client.get("/api/config").json()
-        assert isinstance(config.get("model"), str), \
-            f"model should be string, got {type(config.get('model'))}"
-
-    def test_round_trip_preserves_model_subkeys(self):
-        """Save and reload should not lose model.provider, model.base_url, etc."""
-        from hermes_cli.config import load_config, save_config
-
-        # Set up a config with model as a dict (the common user config form)
-        save_config({
-            "model": {
-                "default": "anthropic/claude-sonnet-4",
-                "provider": "openrouter",
-                "base_url": "https://openrouter.ai/api/v1",
-                "api_mode": "openai",
-            }
-        })
-
-        before = load_config()
-        assert isinstance(before.get("model"), dict)
-        original_keys = set(before["model"].keys())
-
-        # GET → PUT unchanged
-        web_config = self.client.get("/api/config").json()
-        assert isinstance(web_config.get("model"), str), "GET should normalize model to string"
-
-        self.client.put("/api/config", json={"config": web_config})
-
-        after = load_config()
-        assert isinstance(after.get("model"), dict), "model should still be a dict after save"
-        assert set(after["model"].keys()) >= original_keys, \
-            f"Lost model subkeys: {original_keys - set(after['model'].keys())}"
-
-    def test_edit_model_name_preserved(self):
-        """Changing the model string should update model.default on disk."""
-        from hermes_cli.config import load_config
-
-        web_config = self.client.get("/api/config").json()
-        original_model = web_config["model"]
-
-        # Change model
-        web_config["model"] = "test/editing-model"
-        self.client.put("/api/config", json={"config": web_config})
-
-        after = load_config()
-        if isinstance(after.get("model"), dict):
-            assert after["model"]["default"] == "test/editing-model"
-        else:
-            assert after["model"] == "test/editing-model"
-
-        # Restore
-        web_config["model"] = original_model
-        self.client.put("/api/config", json={"config": web_config})
-
-    def test_edit_nested_value(self):
-        """Editing a nested config value should persist correctly."""
-        from hermes_cli.config import load_config
-
-        web_config = self.client.get("/api/config").json()
-        original_turns = web_config.get("agent", {}).get("max_turns")
-
-        # Change max_turns
-        if "agent" not in web_config:
-            web_config["agent"] = {}
-        web_config["agent"]["max_turns"] = 42
-
-        self.client.put("/api/config", json={"config": web_config})
-
-        after = load_config()
-        assert after.get("agent", {}).get("max_turns") == 42
-
-        # Restore
-        web_config["agent"]["max_turns"] = original_turns
-        self.client.put("/api/config", json={"config": web_config})
-
-    def test_schema_types_match_config_values(self):
-        """Every schema field should have a matching-type value in the config."""
-        config = self.client.get("/api/config").json()
-        schema_resp = self.client.get("/api/config/schema").json()
-        schema = schema_resp["fields"]
-
-        def get_nested(obj, path):
-            parts = path.split(".")
-            cur = obj
-            for p in parts:
-                if cur is None or not isinstance(cur, dict):
-                    return None
-                cur = cur.get(p)
-            return cur
-
-        mismatches = []
-        for key, entry in schema.items():
-            val = get_nested(config, key)
-            if val is None:
-                continue  # not set in user config — fine
-            expected = entry["type"]
-            if expected in ("string", "select") and not isinstance(val, str):
-                mismatches.append(f"{key}: expected str, got {type(val).__name__}")
-            elif expected == "number" and not isinstance(val, (int, float)):
-                mismatches.append(f"{key}: expected number, got {type(val).__name__}")
-            elif expected == "boolean" and not isinstance(val, bool):
-                mismatches.append(f"{key}: expected bool, got {type(val).__name__}")
-            elif expected == "list" and not isinstance(val, list):
-                mismatches.append(f"{key}: expected list, got {type(val).__name__}")
-        assert not mismatches, f"Type mismatches:\n" + "\n".join(mismatches)
-
-
-# ---------------------------------------------------------------------------
-# New feature endpoint tests
-# ---------------------------------------------------------------------------
-
-
-class TestNewEndpoints:
-    """Tests for session detail, logs, cron, skills, tools, raw config, analytics."""
-
-    @pytest.fixture(autouse=True)
-    def _setup(self):
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app
-        self.client = TestClient(app)
-
-    def test_get_logs_default(self):
-        resp = self.client.get("/api/logs")
-        assert resp.status_code == 200
-        data = resp.json()
-        assert "file" in data
-        assert "lines" in data
-        assert isinstance(data["lines"], list)
-
-    def test_get_logs_invalid_file(self):
-        resp = self.client.get("/api/logs?file=nonexistent")
-        assert resp.status_code == 400
-
-    def test_cron_list(self):
-        resp = self.client.get("/api/cron/jobs")
-        assert resp.status_code == 200
-        assert isinstance(resp.json(), list)
-
-    def test_cron_job_not_found(self):
-        resp = self.client.get("/api/cron/jobs/nonexistent-id")
-        assert resp.status_code == 404
-
-    def test_skills_list(self):
-        resp = self.client.get("/api/skills")
-        assert resp.status_code == 200
-        skills = resp.json()
-        assert isinstance(skills, list)
-        if skills:
-            assert "name" in skills[0]
-            assert "enabled" in skills[0]
-
-    def test_skills_list_includes_disabled_skills(self, monkeypatch):
-        import tools.skills_tool as skills_tool
-        import hermes_cli.skills_config as skills_config
-        import hermes_cli.web_server as web_server
-
-        def _fake_find_all_skills(*, skip_disabled=False):
-            if skip_disabled:
-                return [
-                    {"name": "active-skill", "description": "active", "category": "demo"},
-                    {"name": "disabled-skill", "description": "disabled", "category": "demo"},
-                ]
-            return [
-                {"name": "active-skill", "description": "active", "category": "demo"},
-            ]
-
-        monkeypatch.setattr(skills_tool, "_find_all_skills", _fake_find_all_skills)
-        monkeypatch.setattr(skills_config, "get_disabled_skills", lambda config: {"disabled-skill"})
-        monkeypatch.setattr(web_server, "load_config", lambda: {"skills": {"disabled": ["disabled-skill"]}})
-
-        resp = self.client.get("/api/skills")
-
-        assert resp.status_code == 200
-        assert resp.json() == [
-            {
-                "name": "active-skill",
-                "description": "active",
-                "category": "demo",
-                "enabled": True,
-            },
-            {
-                "name": "disabled-skill",
-                "description": "disabled",
-                "category": "demo",
-                "enabled": False,
-            },
-        ]
-
-    def test_toolsets_list(self):
-        resp = self.client.get("/api/tools/toolsets")
-        assert resp.status_code == 200
-        toolsets = resp.json()
-        assert isinstance(toolsets, list)
-        if toolsets:
-            assert "name" in toolsets[0]
-            assert "label" in toolsets[0]
-            assert "enabled" in toolsets[0]
-
-    def test_toolsets_list_matches_cli_enabled_state(self, monkeypatch):
-        import hermes_cli.tools_config as tools_config
-        import toolsets as toolsets_module
-        import hermes_cli.web_server as web_server
-
-        monkeypatch.setattr(
-            tools_config,
-            "_get_effective_configurable_toolsets",
-            lambda: [
-                ("web", "🔍 Web Search & Scraping", "web_search, web_extract"),
-                ("skills", "📚 Skills", "list, view, manage"),
-                ("memory", "💾 Memory", "persistent memory across sessions"),
-            ],
-        )
-        monkeypatch.setattr(
-            tools_config,
-            "_get_platform_tools",
-            lambda config, platform, include_default_mcp_servers=False: {"web", "skills"},
-        )
-        monkeypatch.setattr(
-            tools_config,
-            "_toolset_has_keys",
-            lambda ts_key, config=None: ts_key != "web",
-        )
-        monkeypatch.setattr(
-            toolsets_module,
-            "resolve_toolset",
-            lambda name: {
-                "web": ["web_search", "web_extract"],
-                "skills": ["skills_list", "skill_view"],
-                "memory": ["memory_read"],
-            }[name],
-        )
-        monkeypatch.setattr(web_server, "load_config", lambda: {"platform_toolsets": {"cli": ["web", "skills"]}})
-
-        resp = self.client.get("/api/tools/toolsets")
-
-        assert resp.status_code == 200
-        assert resp.json() == [
-            {
-                "name": "web",
-                "label": "🔍 Web Search & Scraping",
-                "description": "web_search, web_extract",
-                "enabled": True,
-                "available": True,
-                "configured": False,
-                "tools": ["web_extract", "web_search"],
-            },
-            {
-                "name": "skills",
-                "label": "📚 Skills",
-                "description": "list, view, manage",
-                "enabled": True,
-                "available": True,
-                "configured": True,
-                "tools": ["skill_view", "skills_list"],
-            },
-            {
-                "name": "memory",
-                "label": "💾 Memory",
-                "description": "persistent memory across sessions",
-                "enabled": False,
-                "available": False,
-                "configured": True,
-                "tools": ["memory_read"],
-            },
-        ]
-
-    def test_config_raw_get(self):
-        resp = self.client.get("/api/config/raw")
-        assert resp.status_code == 200
-        assert "yaml" in resp.json()
-
-    def test_config_raw_put_valid(self):
-        resp = self.client.put(
-            "/api/config/raw",
-            json={"yaml_text": "model: test\ntoolsets:\n  - all\n"},
-        )
-        assert resp.status_code == 200
-        assert resp.json()["ok"] is True
-
-    def test_config_raw_put_invalid(self):
-        resp = self.client.put(
-            "/api/config/raw",
-            json={"yaml_text": "- this is a list not a dict"},
-        )
-        assert resp.status_code == 400
-
-    def test_analytics_usage(self):
-        resp = self.client.get("/api/analytics/usage?days=7")
-        assert resp.status_code == 200
-        data = resp.json()
-        assert "daily" in data
-        assert "by_model" in data
-        assert "totals" in data
-        assert isinstance(data["daily"], list)
-        assert "total_sessions" in data["totals"]
-
-    def test_session_token_endpoint(self):
-        from hermes_cli.web_server import _SESSION_TOKEN
-        resp = self.client.get("/api/auth/session-token")
-        assert resp.status_code == 200
-        assert resp.json()["token"] == _SESSION_TOKEN
@@ -26,7 +26,6 @@ def _make_agent(
    agent.provider = "openrouter"
    agent.base_url = "https://openrouter.ai/api/v1"
    agent.api_key = "sk-test"
-    agent.api_mode = "chat_completions"
    agent.quiet_mode = True
    agent.log_prefix = ""
    agent.compression_enabled = compression_enabled
@@ -38,7 +37,6 @@ def _make_agent(
    agent.status_callback = None
    agent.tool_progress_callback = None
    agent._compression_warning = None
-    agent.config = None

    compressor = MagicMock(spec=ContextCompressor)
    compressor.context_length = main_context
@@ -101,94 +99,6 @@ def test_no_warning_when_aux_context_sufficient(mock_get_client, mock_ctx_len):
    assert agent._compression_warning is None


-def test_feasibility_check_passes_live_main_runtime():
-    """Compression feasibility should probe using the live session runtime."""
-    agent = _make_agent(main_context=200_000, threshold_percent=0.50)
-    agent.model = "gpt-5.4"
-    agent.provider = "openai-codex"
-    agent.base_url = "https://chatgpt.com/backend-api/codex"
-    agent.api_key = "codex-token"
-    agent.api_mode = "codex_responses"
-
-    mock_client = MagicMock()
-    mock_client.base_url = "https://chatgpt.com/backend-api/codex"
-    mock_client.api_key = "codex-token"
-
-    with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_client, "gpt-5.4")) as mock_get_client, \
-         patch("agent.model_metadata.get_model_context_length", return_value=200_000):
-        agent._emit_status = lambda msg: None
-        agent._check_compression_model_feasibility()
-
-    mock_get_client.assert_called_once_with(
-        "compression",
-        main_runtime={
-            "model": "gpt-5.4",
-            "provider": "openai-codex",
-            "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "codex-token",
-            "api_mode": "codex_responses",
-        },
-    )
-
-
-@patch("agent.model_metadata.get_model_context_length", return_value=1_000_000)
-@patch("agent.auxiliary_client.get_text_auxiliary_client")
-def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ctx_len):
-    """auxiliary.compression.context_length from config is forwarded to
-    get_model_context_length so custom endpoints that lack /models still
-    report the correct context window (fixes #8499)."""
-    agent = _make_agent(main_context=200_000, threshold_percent=0.85)
-    agent.config = {
-        "auxiliary": {
-            "compression": {
-                "context_length": 1_000_000,
-            },
-        },
-    }
-    mock_client = MagicMock()
-    mock_client.base_url = "http://custom-endpoint:8080/v1"
-    mock_client.api_key = "sk-custom"
-    mock_get_client.return_value = (mock_client, "custom/big-model")
-
-    agent._emit_status = lambda msg: None
-    agent._check_compression_model_feasibility()
-
-    mock_ctx_len.assert_called_once_with(
-        "custom/big-model",
-        base_url="http://custom-endpoint:8080/v1",
-        api_key="sk-custom",
-        config_context_length=1_000_000,
-    )
-
-
-@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
-@patch("agent.auxiliary_client.get_text_auxiliary_client")
-def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_ctx_len):
-    """Non-integer context_length in config is silently ignored."""
-    agent = _make_agent(main_context=200_000, threshold_percent=0.50)
-    agent.config = {
-        "auxiliary": {
-            "compression": {
-                "context_length": "not-a-number",
-            },
-        },
-    }
-    mock_client = MagicMock()
-    mock_client.base_url = "http://custom:8080/v1"
-    mock_client.api_key = "sk-test"
-    mock_get_client.return_value = (mock_client, "custom/model")
-
-    agent._emit_status = lambda msg: None
-    agent._check_compression_model_feasibility()
-
-    mock_ctx_len.assert_called_once_with(
-        "custom/model",
-        base_url="http://custom:8080/v1",
-        api_key="sk-test",
-        config_context_length=None,
-    )
-
-
@patch("agent.auxiliary_client.get_text_auxiliary_client")
 def test_warns_when_no_auxiliary_provider(mock_get_client):
    """Warning emitted when no auxiliary provider is configured."""
@@ -302,17 +302,6 @@ class TestStripThinkBlocks:
        assert "<think>" not in result
        assert "visible" in result

-    def test_thought_block_removed(self, agent):
-        """Gemma 4 uses <thought> tags for inline reasoning."""
-        result = agent._strip_think_blocks("<thought>internal reasoning</thought> answer")
-        assert "internal reasoning" not in result
-        assert "<thought>" not in result
-        assert "answer" in result
-
-    def test_orphaned_thought_tag(self, agent):
-        result = agent._strip_think_blocks("<thought>orphaned reasoning without close")
-        assert "<thought>" not in result
-

 class TestExtractReasoning:
    def test_reasoning_field(self, agent):
@@ -1741,9 +1730,9 @@ class TestRunConversation:
            {"role": "assistant", "content": "old answer"},
        ]

-        # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion
+        # 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
        with (
-            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6),
+            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
            patch.object(agent, "_compress_context") as mock_compress,
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1754,18 +1743,18 @@ class TestRunConversation:
        mock_compress.assert_not_called()  # no compression triggered
        assert result["completed"] is True
        assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
+        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations

    def test_reasoning_only_response_prefill_then_empty(self, agent):
-        """Structured reasoning-only triggers prefill (2), then retries (3), then (empty)."""
+        """Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
        self._setup_agent(agent)
        empty_resp = _mock_response(
            content=None,
            finish_reason="stop",
            reasoning_content="structured reasoning answer",
        )
-        # 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion
-        agent.client.chat.completions.create.side_effect = [empty_resp] * 6
+        # 3 responses: original + 2 prefill continuations, all reasoning-only
+        agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1774,7 +1763,7 @@ class TestRunConversation:
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
        assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 6  # 1 original + 2 prefill + 3 retries
+        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations

    def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
        """When prefill continuation produces content, it becomes the final response."""
@@ -1949,88 +1938,6 @@ class TestRunConversation:
        failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()]
        assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}"

-    def test_partial_stream_recovery_uses_streamed_content(self, agent):
-        """When streaming fails after partial delivery, recovered partial content becomes final response."""
-        self._setup_agent(agent)
-        # Simulate a partial-stream-stub response: content recovered from streaming
-        partial_resp = _mock_response(
-            content="Here is the partial answer that was stream",
-            finish_reason="stop",
-        )
-        agent.client.chat.completions.create.return_value = partial_resp
-        # Simulate that streaming had already delivered this text
-        agent._current_streamed_assistant_text = "Here is the partial answer that was stream"
-        with (
-            patch.object(agent, "_persist_session"),
-            patch.object(agent, "_save_trajectory"),
-            patch.object(agent, "_cleanup_task_resources"),
-        ):
-            result = agent.run_conversation("explain something")
-        # The partial content should be used as-is (not empty, not retried)
-        assert result["completed"] is True
-        assert result["final_response"] == "Here is the partial answer that was stream"
-        assert result["api_calls"] == 1  # No retries
-
-    def test_partial_stream_recovery_on_empty_stub(self, agent):
-        """When stub response has no content but text was streamed, use streamed text."""
-        self._setup_agent(agent)
-        # Stub response with no content (old behavior before fix)
-        empty_stub = _mock_response(content=None, finish_reason="stop")
-
-        def _fake_api_call(api_kwargs):
-            # Simulate what streaming does: accumulate text before returning
-            # a stub with no content (connection died mid-stream)
-            agent._current_streamed_assistant_text = "The answer to your question is that"
-            return empty_stub
-
-        status_messages = []
-
-        def _capture_status(msg):
-            status_messages.append(msg)
-
-        with (
-            patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
-            patch.object(agent, "_persist_session"),
-            patch.object(agent, "_save_trajectory"),
-            patch.object(agent, "_cleanup_task_resources"),
-            patch.object(agent, "_emit_status", side_effect=_capture_status),
-        ):
-            result = agent.run_conversation("ask me")
-        # Should recover partial streamed content, not fall through to (empty)
-        assert result["completed"] is True
-        assert result["final_response"] == "The answer to your question is that"
-        assert result["api_calls"] == 1  # No wasted retries
-        # Should emit the stream-interrupted status, NOT the empty-retry status
-        recovery_msgs = [m for m in status_messages if "stream interrupted" in m.lower()]
-        assert len(recovery_msgs) >= 1, f"Expected stream recovery status, got: {status_messages}"
-        # Should NOT have retry statuses
-        retry_msgs = [m for m in status_messages if "retrying" in m.lower()]
-        assert len(retry_msgs) == 0, f"Should not retry when stream content exists: {status_messages}"
-
-    def test_partial_stream_recovery_preempts_prior_turn_fallback(self, agent):
-        """Partial streamed content takes priority over _last_content_with_tools fallback."""
-        self._setup_agent(agent)
-        # Set up the prior-turn fallback content (from a previous turn with tool calls)
-        agent._last_content_with_tools = "Old content from prior turn with tools"
-        # Stub response with no content
-        empty_stub = _mock_response(content=None, finish_reason="stop")
-
-        def _fake_api_call(api_kwargs):
-            # Simulate partial streaming before connection death
-            agent._current_streamed_assistant_text = "Fresh partial content from this turn"
-            return empty_stub
-
-        with (
-            patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call),
-            patch.object(agent, "_persist_session"),
-            patch.object(agent, "_save_trajectory"),
-            patch.object(agent, "_cleanup_task_resources"),
-        ):
-            result = agent.run_conversation("question")
-        # Should use the streamed content, not the old prior-turn fallback
-        assert result["final_response"] == "Fresh partial content from this turn"
-        assert result["api_calls"] == 1
-
    def test_nous_401_refreshes_after_remint_and_retries(self, agent):
        self._setup_agent(agent)
        agent.provider = "nous"
@@ -1,120 +0,0 @@
-"""Tests for empty model fallback — when provider is configured but model is missing."""
-
-from unittest.mock import MagicMock, patch
-import pytest
-
-
-class TestGetDefaultModelForProvider:
-    """Unit tests for hermes_cli.models.get_default_model_for_provider."""
-
-    def test_known_provider_returns_first_model(self):
-        from hermes_cli.models import get_default_model_for_provider
-        result = get_default_model_for_provider("openai-codex")
-        # Should return first model from _PROVIDER_MODELS["openai-codex"]
-        assert result
-        assert isinstance(result, str)
-
-    def test_openrouter_returns_empty(self):
-        """OpenRouter uses dynamic model fetch, no static catalog entry."""
-        from hermes_cli.models import get_default_model_for_provider
-        # OpenRouter is not in _PROVIDER_MODELS — it uses live fetching
-        result = get_default_model_for_provider("openrouter")
-        assert result == ""
-
-    def test_unknown_provider_returns_empty(self):
-        from hermes_cli.models import get_default_model_for_provider
-        assert get_default_model_for_provider("nonexistent-provider") == ""
-
-    def test_custom_provider_returns_empty(self):
-        """Custom provider has no model catalog — should return empty."""
-        from hermes_cli.models import get_default_model_for_provider
-        # Custom providers don't have entries in _PROVIDER_MODELS
-        assert get_default_model_for_provider("some-random-custom") == ""
-
-
-class TestGatewayEmptyModelFallback:
-    """Test that _resolve_session_agent_runtime fills in empty model from provider catalog."""
-
-    def test_empty_model_filled_from_provider(self):
-        """When config has no model but provider is openai-codex, use first codex model."""
-        from gateway.run import GatewayRunner
-
-        runner = object.__new__(GatewayRunner)
-        runner._session_model_overrides = {}
-
-        # Mock _resolve_gateway_model to return empty string
-        # Mock _resolve_runtime_agent_kwargs to return openai-codex provider
-        with patch("gateway.run._resolve_gateway_model", return_value=""), \
-             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
-                 "provider": "openai-codex",
-                 "api_key": "test-key",
-                 "base_url": "https://chatgpt.com/backend-api/codex",
-                 "api_mode": "codex_responses",
-             }):
-            model, kwargs = runner._resolve_session_agent_runtime()
-
-        # Model should have been filled in from provider catalog
-        assert model, "Model should not be empty when provider is known"
-        assert isinstance(model, str)
-        assert kwargs["provider"] == "openai-codex"
-
-    def test_nonempty_model_not_overridden(self):
-        """When config has a model set, don't override it."""
-        from gateway.run import GatewayRunner
-
-        runner = object.__new__(GatewayRunner)
-        runner._session_model_overrides = {}
-
-        with patch("gateway.run._resolve_gateway_model", return_value="gpt-5.4"), \
-             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
-                 "provider": "openai-codex",
-                 "api_key": "test-key",
-                 "base_url": "https://chatgpt.com/backend-api/codex",
-                 "api_mode": "codex_responses",
-             }):
-            model, kwargs = runner._resolve_session_agent_runtime()
-
-        assert model == "gpt-5.4", "Explicit model should not be overridden"
-
-    def test_empty_model_no_provider_stays_empty(self):
-        """When both model and provider are empty, model stays empty."""
-        from gateway.run import GatewayRunner
-
-        runner = object.__new__(GatewayRunner)
-        runner._session_model_overrides = {}
-
-        with patch("gateway.run._resolve_gateway_model", return_value=""), \
-             patch("gateway.run._resolve_runtime_agent_kwargs", return_value={
-                 "provider": "",
-                 "api_key": "test-key",
-                 "base_url": "https://example.com",
-                 "api_mode": "chat_completions",
-             }):
-            model, kwargs = runner._resolve_session_agent_runtime()
-
-        # Can't fill in a default without knowing the provider
-        assert model == ""
-
-
-class TestResolveGatewayModel:
-    """Test _resolve_gateway_model reads model from config correctly."""
-
-    def test_returns_default_key(self):
-        from gateway.run import _resolve_gateway_model
-        assert _resolve_gateway_model({"model": {"default": "gpt-5.4"}}) == "gpt-5.4"
-
-    def test_returns_model_key_fallback(self):
-        from gateway.run import _resolve_gateway_model
-        assert _resolve_gateway_model({"model": {"model": "gpt-5.4"}}) == "gpt-5.4"
-
-    def test_returns_empty_when_missing(self):
-        from gateway.run import _resolve_gateway_model
-        assert _resolve_gateway_model({"model": {}}) == ""
-
-    def test_returns_empty_when_no_model_section(self):
-        from gateway.run import _resolve_gateway_model
-        assert _resolve_gateway_model({}) == ""
-
-    def test_string_model_config(self):
-        from gateway.run import _resolve_gateway_model
-        assert _resolve_gateway_model({"model": "my-model"}) == "my-model"
@@ -6,8 +6,7 @@ from unittest.mock import patch

 import pytest

-import hermes_constants
-from hermes_constants import get_default_hermes_root, is_container
+from hermes_constants import get_default_hermes_root


 class TestGetDefaultHermesRoot:
@@ -61,53 +60,3 @@ class TestGetDefaultHermesRoot:
        monkeypatch.setattr(Path, "home", lambda: tmp_path)
        monkeypatch.setenv("HERMES_HOME", str(profile))
        assert get_default_hermes_root() == docker_root
-
-
-class TestIsContainer:
-    """Tests for is_container() — Docker/Podman detection."""
-
-    def _reset_cache(self, monkeypatch):
-        """Reset the cached detection result before each test."""
-        monkeypatch.setattr(hermes_constants, "_container_detected", None)
-
-    def test_detects_dockerenv(self, monkeypatch, tmp_path):
-        """/.dockerenv triggers container detection."""
-        self._reset_cache(monkeypatch)
-        monkeypatch.setattr(os.path, "exists", lambda p: p == "/.dockerenv")
-        assert is_container() is True
-
-    def test_detects_containerenv(self, monkeypatch, tmp_path):
-        """/run/.containerenv triggers container detection (Podman)."""
-        self._reset_cache(monkeypatch)
-        monkeypatch.setattr(os.path, "exists", lambda p: p == "/run/.containerenv")
-        assert is_container() is True
-
-    def test_detects_cgroup_docker(self, monkeypatch, tmp_path):
-        """/proc/1/cgroup containing 'docker' triggers detection."""
-        import builtins
-        self._reset_cache(monkeypatch)
-        monkeypatch.setattr(os.path, "exists", lambda p: False)
-        cgroup_file = tmp_path / "cgroup"
-        cgroup_file.write_text("12:memory:/docker/abc123\n")
-        _real_open = builtins.open
-        monkeypatch.setattr("builtins.open", lambda p, *a, **kw: _real_open(str(cgroup_file), *a, **kw) if p == "/proc/1/cgroup" else _real_open(p, *a, **kw))
-        assert is_container() is True
-
-    def test_negative_case(self, monkeypatch, tmp_path):
-        """Returns False on a regular Linux host."""
-        import builtins
-        self._reset_cache(monkeypatch)
-        monkeypatch.setattr(os.path, "exists", lambda p: False)
-        cgroup_file = tmp_path / "cgroup"
-        cgroup_file.write_text("12:memory:/\n")
-        _real_open = builtins.open
-        monkeypatch.setattr("builtins.open", lambda p, *a, **kw: _real_open(str(cgroup_file), *a, **kw) if p == "/proc/1/cgroup" else _real_open(p, *a, **kw))
-        assert is_container() is False
-
-    def test_caches_result(self, monkeypatch):
-        """Second call uses cached value without re-probing."""
-        monkeypatch.setattr(hermes_constants, "_container_detected", True)
-        assert is_container() is True
-        # Even if we make os.path.exists return False, cached value wins
-        monkeypatch.setattr(os.path, "exists", lambda p: False)
-        assert is_container() is True
@@ -935,7 +935,7 @@ class TestSchemaInit:
    def test_schema_version(self, db):
        cursor = db._conn.execute("SELECT version FROM schema_version")
        version = cursor.fetchone()[0]
-        assert version == 7
+        assert version == 6

    def test_title_column_exists(self, db):
        """Verify the title column was created in the sessions table."""
@@ -996,7 +996,7 @@ class TestSchemaInit:

        # Verify migration
        cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 7
+        assert cursor.fetchone()[0] == 6

        # Verify title column exists and is NULL for existing sessions
        session = migrated_db.get_session("existing")
@@ -18,7 +18,6 @@ from tools.homeassistant_tool import (
    _handle_call_service,
    _BLOCKED_DOMAINS,
    _ENTITY_ID_RE,
-    _SERVICE_NAME_RE,
 )


@@ -304,93 +303,6 @@ class TestEntityIdValidation:
            assert "Invalid entity_id" not in result["error"]


-# ---------------------------------------------------------------------------
-# Security: domain/service name format validation
-# ---------------------------------------------------------------------------
-
-
-class TestServiceNameValidation:
-    """Verify domain/service format validation prevents path traversal in URL.
-
-    The domain and service parameters are interpolated into
-    /api/services/{domain}/{service}, so allowing arbitrary strings would
-    enable SSRF via path traversal or blocked-domain bypass.
-    """
-
-    def test_valid_domain_names(self):
-        assert _SERVICE_NAME_RE.match("light")
-        assert _SERVICE_NAME_RE.match("switch")
-        assert _SERVICE_NAME_RE.match("climate")
-        assert _SERVICE_NAME_RE.match("shell_command")
-        assert _SERVICE_NAME_RE.match("media_player")
-
-    def test_valid_service_names(self):
-        assert _SERVICE_NAME_RE.match("turn_on")
-        assert _SERVICE_NAME_RE.match("turn_off")
-        assert _SERVICE_NAME_RE.match("set_temperature")
-        assert _SERVICE_NAME_RE.match("toggle")
-
-    def test_path_traversal_in_domain_rejected(self):
-        assert _SERVICE_NAME_RE.match("../../api/config") is None
-        assert _SERVICE_NAME_RE.match("light/../../../etc") is None
-        assert _SERVICE_NAME_RE.match("../config") is None
-
-    def test_path_traversal_in_service_rejected(self):
-        assert _SERVICE_NAME_RE.match("../../api/config") is None
-        assert _SERVICE_NAME_RE.match("turn_on/../../config") is None
-
-    def test_blocked_domain_bypass_via_traversal_rejected(self):
-        """Ensure shell_command/../light is rejected, not just checked against blocklist."""
-        assert _SERVICE_NAME_RE.match("shell_command/../light") is None
-        assert _SERVICE_NAME_RE.match("python_script/../scene") is None
-        assert _SERVICE_NAME_RE.match("hassio/../automation") is None
-
-    def test_slashes_rejected(self):
-        assert _SERVICE_NAME_RE.match("light/turn_on") is None
-        assert _SERVICE_NAME_RE.match("a/b/c") is None
-
-    def test_dots_rejected(self):
-        assert _SERVICE_NAME_RE.match("light.turn_on") is None
-        assert _SERVICE_NAME_RE.match("..") is None
-
-    def test_uppercase_rejected(self):
-        assert _SERVICE_NAME_RE.match("LIGHT") is None
-        assert _SERVICE_NAME_RE.match("Turn_On") is None
-
-    def test_special_chars_rejected(self):
-        assert _SERVICE_NAME_RE.match("light;rm") is None
-        assert _SERVICE_NAME_RE.match("light&cmd") is None
-        assert _SERVICE_NAME_RE.match("light cmd") is None
-
-    def test_handler_rejects_traversal_domain(self):
-        """_handle_call_service must reject domain with path traversal."""
-        result = json.loads(_handle_call_service({
-            "domain": "../../api/config",
-            "service": "turn_on",
-        }))
-        assert "error" in result
-        assert "Invalid domain" in result["error"]
-
-    def test_handler_rejects_traversal_service(self):
-        """_handle_call_service must reject service with path traversal."""
-        result = json.loads(_handle_call_service({
-            "domain": "light",
-            "service": "../../api/config",
-        }))
-        assert "error" in result
-        assert "Invalid service" in result["error"]
-
-    def test_handler_rejects_blocklist_bypass_traversal(self):
-        """Blocklist bypass via shell_command/../light must be caught by format validation."""
-        result = json.loads(_handle_call_service({
-            "domain": "shell_command/../light",
-            "service": "turn_on",
-        }))
-        assert "error" in result
-        # Must be rejected as "Invalid domain", not slip through the blocklist
-        assert "Invalid domain" in result["error"]
-
-
 # ---------------------------------------------------------------------------
 # Availability check
 # ---------------------------------------------------------------------------
--- a/Show More
+++ b/Show More