feat(cli): add /exit --delete flag to remove session on quit

Port from google-gemini/gemini-cli#19332. Users can now exit with '/exit --delete' (or '/quit --delete', '/exit -d') to permanently remove the current session's SQLite history plus on-disk transcripts (*.json / *.jsonl / request_dump_*) in one shot. Useful for privacy-sensitive workflows and one-off interactions where leaving a session recording behind is undesirable. Implementation: - New HermesCLI._delete_session_on_exit one-shot flag (defaults False). - process_command() parses --delete / -d after /exit or /quit and arms the flag. Unknown args print a hint and keep the CLI running (prevents typos like '/exit -delete' from accidentally exiting). - Shutdown path calls SessionDB.delete_session(session_id, sessions_dir=...) right after end_session() when the flag is set. That API already existed for 'hermes sessions delete' and handles both SQLite removal (orphaning child sessions so FK constraints hold) and on-disk file cleanup. - /quit CommandDef now advertises '[--delete]' in args_hint so /help and CLI autocomplete surface it. Tests: tests/cli/test_exit_delete_session.py (12 cases covering both aliases, case insensitivity, whitespace, short form, unknown-arg rejection, and registry metadata). E2E-verified with isolated HERMES_HOME: session row deleted, all three transcript/request-dump files removed, second delete_session call correctly returns False.
fix(tui): word-wrap composer input (#17651 )
2026-04-29 17:05:23 -07:00 · 2026-04-29 16:55:49 -07:00 · 2026-04-29 16:55:27 -07:00 · 2026-04-29 15:15:37 -07:00 · 2026-04-29 17:05:51 -05:00 · 2026-04-29 14:58:17 -07:00
206 changed files with 22259 additions and 1197 deletions
@@ -20,7 +20,7 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars
+from utils import base_url_host_matches, normalize_proxy_env_vars

 # NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
 # ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
@@ -257,11 +257,10 @@ _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]

-# Claude Code version — sent on OAuth token-exchange / refresh requests
-# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
-# OAuth flow validates the UA and may reject requests with a version that's
-# too old, so detecting dynamically keeps users on a current Claude Code
-# install from hitting stale-version errors during login/refresh.
+# Claude Code identity — required for OAuth requests to be routed correctly.
+# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
+# The version must stay reasonably current — Anthropic rejects OAuth requests
+# when the spoofed user-agent version is too far behind the actual release.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None

@@ -269,9 +268,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
    """Detect the installed Claude Code version, fall back to a static constant.

-    Used only by the OAuth token-exchange / refresh flow
-    (``platform.claude.com/v1/oauth/token``). The Messages API client no
-    longer sends a claude-cli user-agent.
+    Anthropic's OAuth infrastructure validates the user-agent version and may
+    reject requests with a version that's too old.  Detecting dynamically means
+    users who keep Claude Code updated never hit stale-version 400s.
    """
    import subprocess as _sp

@@ -291,13 +290,12 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version for OAuth flow headers.
+_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
+_MCP_TOOL_PREFIX = "mcp_"

-    Used only on the OAuth token-exchange and refresh endpoints
-    (``platform.claude.com/v1/oauth/token``). The Messages API client does
-    not send a claude-cli user-agent.
-    """
+
+def _get_claude_code_version() -> str:
+    """Lazily detect the installed Claude Code version when OAuth headers need it."""
    global _claude_code_version_cache
    if _claude_code_version_cache is None:
        _claude_code_version_cache = _detect_claude_code_version()
@@ -367,6 +365,88 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")


+# Model-name prefixes that identify the Kimi / Moonshot family.  Covers
+# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
+# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
+# Matched case-insensitively against the post-``normalize_model_name`` form,
+# so a caller's ``provider/vendor/model`` slug is handled the same as a
+# bare name.
+_KIMI_FAMILY_MODEL_PREFIXES = (
+    "kimi-", "kimi_",
+    "moonshot-", "moonshot_",
+    "k1.", "k1-",
+    "k2.", "k2-",
+    "k25", "k2.5",
+)
+
+
+def _model_name_is_kimi_family(model: str | None) -> bool:
+    if not isinstance(model, str):
+        return False
+    m = model.strip().lower()
+    if not m:
+        return False
+    # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
+    if "/" in m:
+        m = m.rsplit("/", 1)[-1]
+    return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
+
+
+def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
+    """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
+
+    Broader than ``_is_kimi_coding_endpoint`` — matches:
+
+    - Kimi's official ``/coding`` URL (legacy check, preserved)
+    - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
+    - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
+      family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
+      ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
+      fall into this branch — the upstream still enforces Kimi's thinking
+      semantics (reasoning_content required on every replayed tool-call
+      message) regardless of the gateway's hostname.
+
+    Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
+    preserve unsigned reasoning_content-derived thinking blocks on replay.
+    See hermes-agent#13848, #17057.
+    """
+    if _is_kimi_coding_endpoint(base_url):
+        return True
+    for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
+        if base_url_host_matches(base_url or "", _domain):
+            return True
+    if _model_name_is_kimi_family(model):
+        return True
+    return False
+
+
+def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for DeepSeek's Anthropic-compatible endpoint.
+
+    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
+    but, when thinking mode is enabled, requires the ``thinking`` blocks
+    from prior assistant turns to round-trip on subsequent requests — the
+    generic third-party path strips them and triggers HTTP 400::
+
+        The content[].thinking in the thinking mode must be passed back
+        to the API.
+
+    Per DeepSeek's published compatibility matrix the blocks are unsigned
+    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
+    so this endpoint is handled with the same strip-signed / keep-unsigned
+    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
+    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
+    base URL (which never reaches this adapter) is not misclassified.
+    See hermes-agent#16748.
+    """
+    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
+        return False
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return "/anthropic" in normalized.rstrip("/").lower()
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -467,21 +547,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
-        # The OAuth-specific beta headers are still required by Anthropic's
-        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
-        # spoofing is deliberately NOT sent — Hermes identifies as itself.
-        #
-        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
-        # OAuth requests that carry it with
-        #   "This authentication style is incompatible with the long
-        #    context beta header."
-        # Subscription-gated OAuth traffic gets the 200K default window.
-        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
-        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
+        # Anthropic routes OAuth requests based on user-agent and headers;
+        # without Claude Code's fingerprint, requests get intermittent 500s.
+        all_betas = common_betas + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
+            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            "x-app": "cli",
        }
    else:
        # Regular API key → x-api-key header + common betas
@@ -825,45 +899,17 @@ def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

    Priority:
-      1. Hermes credential pool (``~/.hermes/auth.json`` →
-         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
-         own PKCE login flow. Entries are auto-refreshed when near
-         expiry. Env-sourced pool entries (``source="env:..."``) are
-         skipped here so the env-var priority logic below still runs.
-      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      3. CLAUDE_CODE_OAUTH_TOKEN env var
-      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      2. CLAUDE_CODE_OAUTH_TOKEN env var
+      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
         — with automatic refresh if expired and a refresh token is available
-      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)

    Returns the token string or None.
    """
-    # 1. Hermes credential pool — the live source of truth for tokens
-    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
-    #    ``select()`` picks the best available entry and refreshes it if
-    #    it's near expiry, so callers always get a fresh token.
-    #
-    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
-    #    those are passthroughs of the env var, and the env-var branches
-    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
-    #    that can upgrade a static env OAuth token to a refreshed
-    #    Claude Code token. Letting the pool win here would short-circuit
-    #    that upgrade.
-    try:
-        from agent.credential_pool import load_pool
-        pool = load_pool("anthropic")
-        entry = pool.select()
-        if entry and entry.access_token and not entry.source.startswith("env:"):
-            return entry.access_token
-    except Exception as exc:
-        # Pool lookup is best-effort — fall through to env/file sources
-        # if anything goes wrong (e.g. auth.json corruption during a
-        # concurrent write).
-        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
-
    creds = read_claude_code_credentials()

-    # 2. Hermes-managed OAuth/setup token env var
+    # 1. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -871,7 +917,7 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return token

-    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -879,12 +925,12 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 4. Claude Code credential file
+    # 3. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -1112,9 +1158,12 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
        # These must not be converted to hyphens.  See issue #12295.
        if _is_bedrock_model_id(model):
            return model
-        # OpenRouter uses dots for version separators (claude-opus-4.6),
-        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
-        model = model.replace(".", "-")
+        # Only convert dots to hyphens for Anthropic/Claude models.
+        # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
+        # as part of their canonical names.  See issue #17171.
+        _lower = model.lower()
+        if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
+            model = model.replace(".", "-")
    return model


@@ -1301,6 +1350,7 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 def convert_messages_to_anthropic(
    messages: List[Dict],
    base_url: str | None = None,
+    model: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.

@@ -1312,6 +1362,12 @@ def convert_messages_to_anthropic(
    endpoint, all thinking block signatures are stripped.  Signatures are
    Anthropic-proprietary — third-party endpoints cannot validate them and will
    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
    """
    system = None
    result = []
@@ -1540,7 +1596,16 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)
+    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
+    # Anthropic Messages protocol upstream but require that thinking blocks
+    # synthesised from reasoning_content round-trip on subsequent turns when
+    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
+    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
+    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    _preserve_unsigned_thinking = (
+        _is_kimi_family_endpoint(base_url, model)
+        or _is_deepseek_anthropic_endpoint(base_url)
+    )

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1552,22 +1617,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
+        if _preserve_unsigned_thinking:
+            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
+            # thinking server-side and require unsigned thinking blocks on
+            # replayed assistant tool-call messages.  Strip signed Anthropic
+            # blocks (neither upstream can validate Anthropic signatures) but
+            # preserve the unsigned ones we synthesised from reasoning_content.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
+                    # Anthropic-signed block — upstream can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
+                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1649,10 +1714,8 @@ def build_anthropic_kwargs(
    "max_tokens too large given prompt" errors and retry with a smaller cap
    (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).

-    When *is_oauth* is True, enables the OAuth-only beta headers required by
-    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
-    the default headers are set by build_anthropic_client). No system-prompt
-    or tool-name rewriting is performed — Hermes identifies as itself.
+    When *is_oauth* is True, applies Claude Code compatibility transforms:
+    system prompt prefix, tool name prefixing, and prompt sanitization.

    When *preserve_dots* is True, model name dots are not converted to hyphens
    (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1665,7 +1728,9 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url, model=model
+    )
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1685,11 +1750,45 @@ def build_anthropic_kwargs(
    if context_length and effective_max_tokens > context_length:
        effective_max_tokens = max(context_length - 1, 1)

-    # OAuth requests go through Anthropic's subscription-gated Messages
-    # endpoint but otherwise send the real Hermes system prompt and real
-    # Hermes tool names — the only OAuth-specific wire differences are
-    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
-    # build_anthropic_client and the fast-mode branch below).
+    # ── OAuth: Claude Code identity ──────────────────────────────────
+    if is_oauth:
+        # 1. Prepend Claude Code system prompt identity
+        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
+        if isinstance(system, list):
+            system = [cc_block] + system
+        elif isinstance(system, str) and system:
+            system = [cc_block, {"type": "text", "text": system}]
+        else:
+            system = [cc_block]
+
+        # 2. Sanitize system prompt — replace product name references
+        #    to avoid Anthropic's server-side content filters.
+        for block in system:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                text = text.replace("Hermes Agent", "Claude Code")
+                text = text.replace("Hermes agent", "Claude Code")
+                text = text.replace("hermes-agent", "claude-code")
+                text = text.replace("Nous Research", "Anthropic")
+                block["text"] = text
+
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        if anthropic_tools:
+            for tool in anthropic_tools:
+                if "name" in tool:
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
+
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
+        for msg in anthropic_messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "tool_use" and "name" in block:
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
+                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                            pass  # tool_result uses ID, not name

    kwargs: Dict[str, Any] = {
        "model": model,
@@ -1737,7 +1836,7 @@ def build_anthropic_kwargs(
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
@@ -1780,9 +1879,6 @@ def build_anthropic_kwargs(
        # extra_headers override the client-level anthropic-beta header).
        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
-            # Strip context-1m — incompatible with OAuth auth. See matching
-            # comment in build_anthropic_client().
-            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
@@ -101,6 +101,14 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
 logger = logging.getLogger(__name__)


+def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
+    """Return False instead of raising when a patched symbol is not a type."""
+    try:
+        return isinstance(obj, maybe_type)
+    except TypeError:
+        return False
+
+
 def _extract_url_query_params(url: str):
    """Extract query params from URL, return (clean_url, default_query dict or None)."""
    parsed = urlparse(url)
@@ -210,6 +218,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
+    "minimax-oauth": "MiniMax-M2.7-highspeed",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
@@ -229,6 +238,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "zai": "glm-5v-turbo",
 }

+# Providers whose endpoint does not accept image input, even though the
+# provider's broader ecosystem has vision models available elsewhere.  When
+# `auxiliary.vision.provider: auto` sees one of these as the main provider,
+# it must skip straight to the aggregator chain instead of returning a client
+# that will 404 on every vision request.
+#
+# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
+# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
+# describe as having no image_in capability. Vision lives on the separate
+# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go).  See #17076.
+_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
+    "kimi-coding",
+    "kimi-coding-cn",
+})
+
 # OpenRouter app attribution headers
 _OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
@@ -713,7 +737,9 @@ class _AnthropicCompletionsAdapter:

        response = self._client.messages.create(**anthropic_kwargs)
        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(response)
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )

        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
        # .function.arguments) via properties, so no wrapping needed.
@@ -843,20 +869,20 @@ def _maybe_wrap_anthropic(
    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
    """
    # Already wrapped — don't double-wrap.
-    if isinstance(client_obj, AnthropicAuxiliaryClient):
+    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
        return client_obj
    # Other specialized adapters we should never re-dispatch.
-    if isinstance(client_obj, CodexAuxiliaryClient):
+    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
        return client_obj
    try:
        from agent.gemini_native_adapter import GeminiNativeClient
-        if isinstance(client_obj, GeminiNativeClient):
+        if _safe_isinstance(client_obj, GeminiNativeClient):
            return client_obj
    except ImportError:
        pass
    try:
        from agent.copilot_acp_client import CopilotACPClient
-        if isinstance(client_obj, CopilotACPClient):
+        if _safe_isinstance(client_obj, CopilotACPClient):
            return client_obj
    except ImportError:
        pass
@@ -2532,6 +2558,19 @@ def resolve_vision_provider_client(
                        main_provider, default_model or resolved_model or main_model,
                    )
                    return _finalize(main_provider, sync_client, default_model)
+            elif main_provider in _PROVIDERS_WITHOUT_VISION:
+                # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
+                # does not accept image input — Kimi's own docs say "Current
+                # model does not support image input, switch to a model with
+                # image_in capability" and vision lives on the separate Kimi
+                # Platform (api.moonshot.ai). Skip the main provider and fall
+                # through to the aggregator chain instead of returning a
+                # client that will 404 on every vision request (#17076).
+                logger.debug(
+                    "Vision auto-detect: skipping main provider %s (no "
+                    "vision support) — falling through to aggregator chain",
+                    main_provider,
+                )
            else:
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
@@ -3013,7 +3052,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -1299,6 +1299,48 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

+    elif provider == "minimax-oauth":
+        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
+        # Seed the pool so `/auth list` reflects the logged-in state and the
+        # standard `hermes auth remove minimax-oauth <N>` flow works.
+        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
+        # always refreshes on expiry, so instead read raw state here to avoid
+        # surprise network calls during provider discovery.
+        try:
+            from hermes_cli.auth import get_provider_auth_state
+            state = get_provider_auth_state("minimax-oauth")
+            if state and state.get("access_token"):
+                source_name = "oauth"
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    expires_at_ms = None
+                    try:
+                        from datetime import datetime as _dt
+                        raw = state.get("expires_at", "")
+                        if raw:
+                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
+                    except Exception:
+                        expires_at_ms = None
+                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": state["access_token"],
+                            "refresh_token": state.get("refresh_token"),
+                            "expires_at_ms": expires_at_ms,
+                            "base_url": base_url,
+                            "label": state.get("label", "") or label_from_token(
+                                state.get("access_token", ""), source_name
+                            ),
+                        },
+                    )
+        except Exception as exc:
+            logger.debug("MiniMax OAuth token seed failed: %s", exc)
+
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -252,6 +252,19 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


+def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
+    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
+
+    Same pattern as Nous: single-source OAuth state with refresh tokens.
+    Suppression of the `oauth` source ensures the pool reseed path
+    (_seed_from_singletons) doesn't instantly undo the removal.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -389,6 +402,11 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
+    register(RemovalStep(
+        provider="minimax-oauth", source_id="oauth",
+        remove_fn=_remove_minimax_oauth,
+        description="auth.json providers.minimax-oauth",
+    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
@@ -0,0 +1,869 @@
+"""Curator — background skill maintenance orchestrator.
+
+The curator is an auxiliary-model task that periodically reviews agent-created
+skills and maintains the collection. It runs inactivity-triggered (no cron
+daemon): when the agent is idle and the last curator run was longer than
+``interval_hours`` ago, ``maybe_run_curator()`` spawns a forked AIAgent to do
+the review.
+
+Responsibilities:
+  - Auto-transition lifecycle states based on last_used_at timestamps
+  - Spawn a background review agent that can pin / archive / consolidate /
+    patch agent-created skills via skill_manage
+  - Persist curator state (last_run_at, paused, etc.) in .curator_state
+
+Strict invariants:
+  - Only touches agent-created skills (see tools/skill_usage.is_agent_created)
+  - Never auto-deletes — only archives. Archive is recoverable.
+  - Pinned skills bypass all auto-transitions
+  - Uses the auxiliary client; never touches the main session's prompt cache
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import tempfile
+import threading
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+from hermes_constants import get_hermes_home
+from tools import skill_usage
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_INTERVAL_HOURS = 24 * 7  # 7 days
+DEFAULT_MIN_IDLE_HOURS = 2
+DEFAULT_STALE_AFTER_DAYS = 30
+DEFAULT_ARCHIVE_AFTER_DAYS = 90
+
+
+# ---------------------------------------------------------------------------
+# .curator_state — persistent scheduler + status
+# ---------------------------------------------------------------------------
+
+def _state_file() -> Path:
+    return get_hermes_home() / "skills" / ".curator_state"
+
+
+def _default_state() -> Dict[str, Any]:
+    return {
+        "last_run_at": None,
+        "last_run_duration_seconds": None,
+        "last_run_summary": None,
+        "paused": False,
+        "run_count": 0,
+    }
+
+
+def load_state() -> Dict[str, Any]:
+    path = _state_file()
+    if not path.exists():
+        return _default_state()
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        if isinstance(data, dict):
+            base = _default_state()
+            base.update({k: v for k, v in data.items() if k in base or k.startswith("_")})
+            return base
+    except (OSError, json.JSONDecodeError) as e:
+        logger.debug("Failed to read curator state: %s", e)
+    return _default_state()
+
+
+def save_state(data: Dict[str, Any]) -> None:
+    path = _state_file()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
+        try:
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp, path)
+        except BaseException:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+    except Exception as e:
+        logger.debug("Failed to save curator state: %s", e, exc_info=True)
+
+
+def set_paused(paused: bool) -> None:
+    state = load_state()
+    state["paused"] = bool(paused)
+    save_state(state)
+
+
+def is_paused() -> bool:
+    return bool(load_state().get("paused"))
+
+
+# ---------------------------------------------------------------------------
+# Config access
+# ---------------------------------------------------------------------------
+
+def _load_config() -> Dict[str, Any]:
+    """Read curator.* config from ~/.hermes/config.yaml. Tolerates missing file."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as e:
+        logger.debug("Failed to load config for curator: %s", e)
+        return {}
+    if not isinstance(cfg, dict):
+        return {}
+    cur = cfg.get("curator") or {}
+    if not isinstance(cur, dict):
+        return {}
+    return cur
+
+
+def is_enabled() -> bool:
+    """Default ON when no config says otherwise."""
+    cfg = _load_config()
+    return bool(cfg.get("enabled", True))
+
+
+def get_interval_hours() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("interval_hours", DEFAULT_INTERVAL_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_INTERVAL_HOURS
+
+
+def get_min_idle_hours() -> float:
+    cfg = _load_config()
+    try:
+        return float(cfg.get("min_idle_hours", DEFAULT_MIN_IDLE_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_MIN_IDLE_HOURS
+
+
+def get_stale_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("stale_after_days", DEFAULT_STALE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_STALE_AFTER_DAYS
+
+
+def get_archive_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("archive_after_days", DEFAULT_ARCHIVE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_ARCHIVE_AFTER_DAYS
+
+
+# ---------------------------------------------------------------------------
+# Idle / interval check
+# ---------------------------------------------------------------------------
+
+def _parse_iso(ts: Optional[str]) -> Optional[datetime]:
+    if not ts:
+        return None
+    try:
+        return datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return None
+
+
+def should_run_now(now: Optional[datetime] = None) -> bool:
+    """Return True if the curator should run immediately.
+
+    Gates:
+      - curator.enabled == True
+      - not paused
+      - last_run_at missing, OR older than interval_hours
+
+    The idle check (min_idle_hours) is applied at the call site where we know
+    whether an agent is actively running — here we only enforce the static
+    gates.
+    """
+    if not is_enabled():
+        return False
+    if is_paused():
+        return False
+
+    state = load_state()
+    last = _parse_iso(state.get("last_run_at"))
+    if last is None:
+        return True
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    if last.tzinfo is None:
+        last = last.replace(tzinfo=timezone.utc)
+    interval = timedelta(hours=get_interval_hours())
+    return (now - last) >= interval
+
+
+# ---------------------------------------------------------------------------
+# Automatic state transitions (pure function, no LLM)
+# ---------------------------------------------------------------------------
+
+def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
+    """Walk every agent-created skill and move active/stale/archived based on
+    last_used_at. Pinned skills are never touched. Returns a counter dict
+    describing what changed."""
+    from tools import skill_usage as _u
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    stale_cutoff = now - timedelta(days=get_stale_after_days())
+    archive_cutoff = now - timedelta(days=get_archive_after_days())
+
+    counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0}
+
+    for row in _u.agent_created_report():
+        counts["checked"] += 1
+        name = row["name"]
+        if row.get("pinned"):
+            continue
+
+        last_used = _parse_iso(row.get("last_used_at"))
+        # If never used, treat as using created_at as the anchor so new skills
+        # don't immediately archive themselves.
+        anchor = last_used or _parse_iso(row.get("created_at")) or now
+        if anchor.tzinfo is None:
+            anchor = anchor.replace(tzinfo=timezone.utc)
+
+        current = row.get("state", _u.STATE_ACTIVE)
+
+        if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
+            ok, _msg = _u.archive_skill(name)
+            if ok:
+                counts["archived"] += 1
+        elif anchor <= stale_cutoff and current == _u.STATE_ACTIVE:
+            _u.set_state(name, _u.STATE_STALE)
+            counts["marked_stale"] += 1
+        elif anchor > stale_cutoff and current == _u.STATE_STALE:
+            # Skill got used again after being marked stale — reactivate.
+            _u.set_state(name, _u.STATE_ACTIVE)
+            counts["reactivated"] += 1
+
+    return counts
+
+
+# ---------------------------------------------------------------------------
+# Review prompt for the forked agent
+# ---------------------------------------------------------------------------
+
+CURATOR_REVIEW_PROMPT = (
+    "You are running as Hermes' background skill CURATOR. This is an "
+    "UMBRELLA-BUILDING consolidation pass, not a passive audit and not a "
+    "duplicate-finder.\n\n"
+    "The goal of the skill collection is a LIBRARY OF CLASS-LEVEL "
+    "INSTRUCTIONS AND EXPERIENTIAL KNOWLEDGE. A collection of hundreds of "
+    "narrow skills where each one captures one session's specific bug is "
+    "a FAILURE of the library — not a feature. An agent searching skills "
+    "matches on descriptions, not on exact names; one broad umbrella "
+    "skill with labeled subsections beats five narrow siblings for "
+    "discoverability, not the other way around.\n\n"
+    "The right target shape is CLASS-LEVEL skills with rich SKILL.md "
+    "bodies + `references/`, `templates/`, and `scripts/` subfiles for "
+    "session-specific detail — not one-session-one-skill micro-entries.\n\n"
+    "Hard rules — do not violate:\n"
+    "1. DO NOT touch bundled or hub-installed skills. The candidate list "
+    "below is already filtered to agent-created skills only.\n"
+    "2. DO NOT delete any skill. Archiving (moving the skill's directory "
+    "into ~/.hermes/skills/.archive/) is the maximum destructive action. "
+    "Archives are recoverable; deletion is not.\n"
+    "3. DO NOT touch skills shown as pinned=yes. Skip them entirely.\n"
+    "4. DO NOT use usage counters as a reason to skip consolidation. The "
+    "counters are new and often mostly zero. Judge overlap on CONTENT, "
+    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
+    "absence of evidence either way.\n"
+    "5. DO NOT reject consolidation on the grounds that 'each skill has "
+    "a distinct trigger'. Pairwise distinctness is the wrong bar. The "
+    "right bar is: 'would a human maintainer write this as N separate "
+    "skills, or as one skill with N labeled subsections?' When the "
+    "answer is the latter, merge.\n\n"
+    "How to work — not optional:\n"
+    "1. Scan the full candidate list. Identify PREFIX CLUSTERS (skills "
+    "sharing a first word or domain keyword). Examples you are likely "
+    "to find: hermes-config-*, hermes-dashboard-*, gateway-*, codex-*, "
+    "ollama-*, anthropic-*, gemini-*, mcp-*, salvage-*, pr-*, "
+    "competitor-*, python-*, security-*, etc. Expect 10-25 clusters.\n"
+    "2. For each cluster with 2+ members, do NOT ask 'are these pairs "
+    "overlapping?' — ask 'what is the UMBRELLA CLASS these skills all "
+    "serve? Would a maintainer name that class and write one skill for "
+    "it?' If yes, pick (or create) the umbrella and absorb the siblings "
+    "into it.\n"
+    "3. Three ways to consolidate — use the right one per cluster:\n"
+    "   a. MERGE INTO EXISTING UMBRELLA — one skill in the cluster is "
+    "already broad enough to be the umbrella (example: `pr-triage-"
+    "salvage` for the PR review cluster). Patch it to add a labeled "
+    "section for each sibling's unique insight, then archive the "
+    "siblings.\n"
+    "   b. CREATE A NEW UMBRELLA SKILL.md — no existing member is broad "
+    "enough. Use skill_manage action=create to write a new class-level "
+    "skill whose SKILL.md covers the shared workflow and has short "
+    "labeled subsections. Archive the now-absorbed narrow siblings.\n"
+    "   c. DEMOTE TO REFERENCES/TEMPLATES/SCRIPTS — a sibling has "
+    "narrow-but-valuable session-specific content. Move it into the "
+    "umbrella's appropriate support directory:\n"
+    "      • `references/<topic>.md` for session-specific detail OR "
+    "condensed knowledge banks (quoted research, API docs excerpts, "
+    "domain notes, provider quirks, reproduction recipes)\n"
+    "      • `templates/<name>.<ext>` for starter files meant to be "
+    "copied and modified\n"
+    "      • `scripts/<name>.<ext>` for statically re-runnable actions "
+    "(verification scripts, fixture generators, probes)\n"
+    "      Then archive the old sibling. Use `terminal` with `mkdir -p "
+    "~/.hermes/skills/<umbrella>/references/ && mv ... <umbrella>/"
+    "references/<topic>.md` (or templates/ / scripts/).\n"
+    "4. Also flag skills whose NAME is too narrow (contains a PR number, "
+    "a feature codename, a specific error string, an 'audit' / "
+    "'diagnosis' / 'salvage' session artifact). These almost always "
+    "belong as a subsection or support file under a class-level umbrella.\n"
+    "5. Iterate. After one consolidation round, scan the remaining set "
+    "and look for the NEXT umbrella opportunity. Don't stop after 3 "
+    "merges.\n\n"
+    "Your toolset:\n"
+    "  - skills_list, skill_view        — read the current landscape\n"
+    "  - skill_manage action=patch      — add sections to the umbrella\n"
+    "  - skill_manage action=create     — create a new umbrella SKILL.md\n"
+    "  - skill_manage action=write_file — add a references/, templates/, "
+    "or scripts/ file under an existing skill (the skill must already "
+    "exist)\n"
+    "  - terminal                       — mv a sibling into the archive "
+    "OR move its content into a support subfile\n\n"
+    "'keep' is a legitimate decision ONLY when the skill is already a "
+    "class-level umbrella and none of the proposed merges would improve "
+    "discoverability. 'This is narrow but distinct from its siblings' "
+    "is NOT a reason to keep — it's a reason to move it under an "
+    "umbrella as a subsection or support file.\n\n"
+    "Expected output: real umbrella-ification. Process every obvious "
+    "cluster. If you end the pass with fewer than 10 archives, you "
+    "stopped too early — go back and look at the clusters you left "
+    "alone.\n\n"
+    "When done, write a summary with: clusters processed, skills "
+    "patched/absorbed, skills demoted to references/templates/scripts, "
+    "skills archived, new umbrellas created, and clusters you "
+    "deliberately left alone with one line each."
+)
+
+
+# ---------------------------------------------------------------------------
+# Per-run reports — {YYYYMMDD-HHMMSS}/run.json + REPORT.md under logs/curator/
+# ---------------------------------------------------------------------------
+
+def _reports_root() -> Path:
+    """Directory where curator run reports are written.
+
+    Lives under the profile-aware logs dir (``~/.hermes/logs/curator/``)
+    alongside ``agent.log`` and ``gateway.log`` so it's found by anyone
+    looking for operational telemetry, not mixed in with the user's
+    authored skill data in ``~/.hermes/skills/``.
+    """
+    return get_hermes_home() / "logs" / "curator"
+
+
+def _write_run_report(
+    *,
+    started_at: datetime,
+    elapsed_seconds: float,
+    auto_counts: Dict[str, int],
+    auto_summary: str,
+    before_report: List[Dict[str, Any]],
+    before_names: Set[str],
+    after_report: List[Dict[str, Any]],
+    llm_meta: Dict[str, Any],
+) -> Optional[Path]:
+    """Write run.json + REPORT.md under logs/curator/{YYYYMMDD-HHMMSS}/.
+
+    Returns the report directory path on success, None if the write
+    couldn't happen (caller logs and continues — reporting is best-effort).
+    """
+    root = _reports_root()
+    try:
+        root.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        logger.debug("Curator report dir create failed: %s", e)
+        return None
+
+    stamp = started_at.strftime("%Y%m%d-%H%M%S")
+    run_dir = root / stamp
+    # If we crash-reran within the same second, append a disambiguator
+    suffix = 1
+    while run_dir.exists():
+        suffix += 1
+        run_dir = root / f"{stamp}-{suffix}"
+    try:
+        run_dir.mkdir(parents=True, exist_ok=False)
+    except Exception as e:
+        logger.debug("Curator run dir create failed: %s", e)
+        return None
+
+    # Diff before/after
+    after_by_name = {r.get("name"): r for r in after_report if isinstance(r, dict)}
+    after_names = set(after_by_name.keys())
+    removed = sorted(before_names - after_names)   # archived during this run
+    added = sorted(after_names - before_names)     # new skills this run
+    before_by_name = {r.get("name"): r for r in before_report if isinstance(r, dict)}
+
+    # State transitions between the two snapshots (e.g. active -> stale)
+    transitions: List[Dict[str, str]] = []
+    for name in sorted(after_names & before_names):
+        s_before = (before_by_name.get(name) or {}).get("state")
+        s_after = (after_by_name.get(name) or {}).get("state")
+        if s_before and s_after and s_before != s_after:
+            transitions.append({"name": name, "from": s_before, "to": s_after})
+
+    # Classify LLM tool calls
+    tc_counts: Dict[str, int] = {}
+    for tc in llm_meta.get("tool_calls", []) or []:
+        name = tc.get("name", "unknown")
+        tc_counts[name] = tc_counts.get(name, 0) + 1
+
+    payload = {
+        "started_at": started_at.isoformat(),
+        "duration_seconds": round(elapsed_seconds, 2),
+        "model": llm_meta.get("model", ""),
+        "provider": llm_meta.get("provider", ""),
+        "auto_transitions": auto_counts,
+        "counts": {
+            "before": len(before_names),
+            "after": len(after_names),
+            "delta": len(after_names) - len(before_names),
+            "archived_this_run": len(removed),
+            "added_this_run": len(added),
+            "state_transitions": len(transitions),
+            "tool_calls_total": sum(tc_counts.values()),
+        },
+        "tool_call_counts": tc_counts,
+        "archived": removed,
+        "added": added,
+        "state_transitions": transitions,
+        "llm_final": llm_meta.get("final", ""),
+        "llm_summary": llm_meta.get("summary", ""),
+        "llm_error": llm_meta.get("error"),
+        "tool_calls": llm_meta.get("tool_calls", []),
+    }
+
+    # run.json — machine-readable, full fidelity
+    try:
+        (run_dir / "run.json").write_text(
+            json.dumps(payload, indent=2, ensure_ascii=False) + "\n",
+            encoding="utf-8",
+        )
+    except Exception as e:
+        logger.debug("Curator run.json write failed: %s", e)
+
+    # REPORT.md — human-readable
+    try:
+        md = _render_report_markdown(payload)
+        (run_dir / "REPORT.md").write_text(md, encoding="utf-8")
+    except Exception as e:
+        logger.debug("Curator REPORT.md write failed: %s", e)
+
+    return run_dir
+
+
+def _render_report_markdown(p: Dict[str, Any]) -> str:
+    """Render the human-readable report."""
+    lines: List[str] = []
+    started = p.get("started_at", "")
+    duration = p.get("duration_seconds", 0) or 0
+    mins, secs = divmod(int(duration), 60)
+    dur_label = f"{mins}m {secs}s" if mins else f"{secs}s"
+
+    lines.append(f"# Curator run — {started}\n")
+    model = p.get("model") or "(not resolved)"
+    prov = p.get("provider") or "(not resolved)"
+    counts = p.get("counts") or {}
+    lines.append(
+        f"Model: `{model}` via `{prov}`  ·  Duration: {dur_label}  ·  "
+        f"Agent-created skills: {counts.get('before', 0)} → {counts.get('after', 0)} "
+        f"({counts.get('delta', 0):+d})\n"
+    )
+
+    error = p.get("llm_error")
+    if error:
+        lines.append(f"> ⚠ LLM pass error: `{error}`\n")
+
+    # Auto-transitions (pure, no LLM)
+    auto = p.get("auto_transitions") or {}
+    lines.append("## Auto-transitions (pure, no LLM)\n")
+    lines.append(f"- checked: {auto.get('checked', 0)}")
+    lines.append(f"- marked stale: {auto.get('marked_stale', 0)}")
+    lines.append(f"- archived: {auto.get('archived', 0)}")
+    lines.append(f"- reactivated: {auto.get('reactivated', 0)}")
+    lines.append("")
+
+    # LLM pass numbers
+    tc_counts = p.get("tool_call_counts") or {}
+    lines.append("## LLM consolidation pass\n")
+    lines.append(f"- tool calls: **{counts.get('tool_calls_total', 0)}** "
+                 f"(by name: {', '.join(f'{k}={v}' for k, v in sorted(tc_counts.items())) or 'none'})")
+    lines.append(f"- archived this run: **{counts.get('archived_this_run', 0)}**")
+    lines.append(f"- new skills this run: **{counts.get('added_this_run', 0)}**")
+    lines.append(f"- state transitions (active ↔ stale ↔ archived): "
+                 f"**{counts.get('state_transitions', 0)}**")
+    lines.append("")
+
+    # Archived list
+    archived = p.get("archived") or []
+    if archived:
+        lines.append(f"### Skills archived ({len(archived)})\n")
+        lines.append("_Archived skills are at `~/.hermes/skills/.archive/`. "
+                     "Restore any via `hermes curator restore <name>`._\n")
+        # Show first 50 inline, note truncation after that
+        SHOW = 50
+        for n in archived[:SHOW]:
+            lines.append(f"- `{n}`")
+        if len(archived) > SHOW:
+            lines.append(f"- … and {len(archived) - SHOW} more (see `run.json` for the full list)")
+        lines.append("")
+
+    # Added list
+    added = p.get("added") or []
+    if added:
+        lines.append(f"### New skills this run ({len(added)})\n")
+        lines.append("_Usually these are new class-level umbrellas created via `skill_manage action=create`._\n")
+        for n in added:
+            lines.append(f"- `{n}`")
+        lines.append("")
+
+    # State transitions
+    trans = p.get("state_transitions") or []
+    if trans:
+        lines.append(f"### State transitions ({len(trans)})\n")
+        for t in trans:
+            lines.append(f"- `{t.get('name')}`: {t.get('from')} → {t.get('to')}")
+        lines.append("")
+
+    # Full LLM final response
+    final = (p.get("llm_final") or "").strip()
+    if final:
+        lines.append("## LLM final summary\n")
+        lines.append(final)
+        lines.append("")
+    elif not error:
+        llm_sum = p.get("llm_summary") or ""
+        if llm_sum:
+            lines.append("## LLM summary\n")
+            lines.append(llm_sum)
+            lines.append("")
+
+    # Recovery footer
+    lines.append("## Recovery\n")
+    lines.append("- Restore an archived skill: `hermes curator restore <name>`")
+    lines.append("- All archives live under `~/.hermes/skills/.archive/` and are recoverable by `mv`")
+    lines.append("- See `run.json` in this directory for the full machine-readable record.")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator — spawn a forked AIAgent for the LLM review pass
+# ---------------------------------------------------------------------------
+
+def _render_candidate_list() -> str:
+    """Human/agent-readable list of agent-created skills with usage stats."""
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        return "No agent-created skills to review."
+    lines = [f"Agent-created skills ({len(rows)}):\n"]
+    for r in rows:
+        lines.append(
+            f"- {r['name']}  "
+            f"state={r['state']}  "
+            f"pinned={'yes' if r.get('pinned') else 'no'}  "
+            f"use={r.get('use_count', 0)}  "
+            f"view={r.get('view_count', 0)}  "
+            f"patches={r.get('patch_count', 0)}  "
+            f"last_used={r.get('last_used_at') or 'never'}"
+        )
+    return "\n".join(lines)
+
+
+def run_curator_review(
+    on_summary: Optional[Callable[[str], None]] = None,
+    synchronous: bool = False,
+) -> Dict[str, Any]:
+    """Execute a single curator review pass.
+
+    Steps:
+      1. Apply automatic state transitions (pure, no LLM).
+      2. If there are agent-created skills, spawn a forked AIAgent that runs
+         the LLM review prompt against the current candidate list.
+      3. Update .curator_state with last_run_at and a one-line summary.
+      4. Invoke *on_summary* with a user-visible description.
+
+    If *synchronous* is True, the LLM review runs in the calling thread; the
+    default is to spawn a daemon thread so the caller returns immediately.
+    """
+    start = datetime.now(timezone.utc)
+    counts = apply_automatic_transitions(now=start)
+
+    auto_summary_parts = []
+    if counts["marked_stale"]:
+        auto_summary_parts.append(f"{counts['marked_stale']} marked stale")
+    if counts["archived"]:
+        auto_summary_parts.append(f"{counts['archived']} archived")
+    if counts["reactivated"]:
+        auto_summary_parts.append(f"{counts['reactivated']} reactivated")
+    auto_summary = ", ".join(auto_summary_parts) if auto_summary_parts else "no changes"
+
+    # Persist state before the LLM pass so a crash mid-review still records
+    # the run and doesn't immediately re-trigger.
+    state = load_state()
+    state["last_run_at"] = start.isoformat()
+    state["run_count"] = int(state.get("run_count", 0)) + 1
+    state["last_run_summary"] = f"auto: {auto_summary}"
+    save_state(state)
+
+    def _llm_pass():
+        nonlocal auto_summary
+        # Snapshot skill state BEFORE the LLM pass so the report can diff.
+        try:
+            before_report = skill_usage.agent_created_report()
+        except Exception:
+            before_report = []
+        before_names = {r.get("name") for r in before_report if isinstance(r, dict)}
+
+        llm_meta: Dict[str, Any] = {}
+        try:
+            candidate_list = _render_candidate_list()
+            if "No agent-created skills" in candidate_list:
+                final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
+                llm_meta = {
+                    "final": "",
+                    "summary": "skipped (no candidates)",
+                    "model": "",
+                    "provider": "",
+                    "tool_calls": [],
+                    "error": None,
+                }
+            else:
+                prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
+                llm_meta = _run_llm_review(prompt)
+                final_summary = (
+                    f"auto: {auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
+                )
+        except Exception as e:
+            logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
+            final_summary = f"auto: {auto_summary}; llm: error ({e})"
+            llm_meta = {
+                "final": "",
+                "summary": f"error ({e})",
+                "model": "",
+                "provider": "",
+                "tool_calls": [],
+                "error": str(e),
+            }
+
+        elapsed = (datetime.now(timezone.utc) - start).total_seconds()
+        state2 = load_state()
+        state2["last_run_duration_seconds"] = elapsed
+        state2["last_run_summary"] = final_summary
+
+        # Write the per-run report. Runs in a best-effort try so a
+        # reporting bug never breaks the curator itself. Report path is
+        # recorded in state so `hermes curator status` can point at it.
+        try:
+            after_report = skill_usage.agent_created_report()
+        except Exception:
+            after_report = []
+        try:
+            report_path = _write_run_report(
+                started_at=start,
+                elapsed_seconds=elapsed,
+                auto_counts=counts,
+                auto_summary=auto_summary,
+                before_report=before_report,
+                before_names=before_names,
+                after_report=after_report,
+                llm_meta=llm_meta,
+            )
+            if report_path is not None:
+                state2["last_report_path"] = str(report_path)
+        except Exception as e:
+            logger.debug("Curator report write failed: %s", e, exc_info=True)
+
+        save_state(state2)
+
+        if on_summary:
+            try:
+                on_summary(f"curator: {final_summary}")
+            except Exception:
+                pass
+
+    if synchronous:
+        _llm_pass()
+    else:
+        t = threading.Thread(target=_llm_pass, daemon=True, name="curator-review")
+        t.start()
+
+    return {
+        "started_at": start.isoformat(),
+        "auto_transitions": counts,
+        "summary_so_far": auto_summary,
+    }
+
+
+def _run_llm_review(prompt: str) -> Dict[str, Any]:
+    """Spawn an AIAgent fork to run the curator review prompt.
+
+    Returns a dict with:
+      - final: full (untruncated) final response from the reviewer
+      - summary: short summary suitable for state file (240-char cap)
+      - model, provider: what the fork actually ran on
+      - tool_calls: list of {name, arguments} for every tool call made during
+        the pass (arguments may be truncated for readability)
+      - error: set if the pass failed mid-run; final/summary may still be empty
+
+    Never raises; callers get a structured failure instead.
+    """
+    import contextlib
+    result_meta: Dict[str, Any] = {
+        "final": "",
+        "summary": "",
+        "model": "",
+        "provider": "",
+        "tool_calls": [],
+        "error": None,
+    }
+    try:
+        from run_agent import AIAgent
+    except Exception as e:
+        result_meta["error"] = f"AIAgent import failed: {e}"
+        result_meta["summary"] = result_meta["error"]
+        return result_meta
+
+    # Resolve provider + model the same way the CLI does, so the curator
+    # fork inherits the user's active main config rather than falling
+    # through to an empty provider/model pair (which sends HTTP 400
+    # "No models provided"). AIAgent() without explicit provider/model
+    # arguments hits an auto-resolution path that fails for OAuth-only
+    # providers and for pool-backed credentials.
+    _api_key = None
+    _base_url = None
+    _api_mode = None
+    _resolved_provider = None
+    _model_name = ""
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        _cfg = load_config()
+        _m = _cfg.get("model", {}) if isinstance(_cfg.get("model"), dict) else {}
+        _provider = _m.get("provider") or "auto"
+        _model_name = _m.get("default") or _m.get("model") or ""
+        _rp = resolve_runtime_provider(
+            requested=_provider, target_model=_model_name
+        )
+        _api_key = _rp.get("api_key")
+        _base_url = _rp.get("base_url")
+        _api_mode = _rp.get("api_mode")
+        _resolved_provider = _rp.get("provider") or _provider
+    except Exception as e:
+        logger.debug("Curator provider resolution failed: %s", e, exc_info=True)
+
+    result_meta["model"] = _model_name
+    result_meta["provider"] = _resolved_provider or ""
+
+    review_agent = None
+    try:
+        review_agent = AIAgent(
+            model=_model_name,
+            provider=_resolved_provider,
+            api_key=_api_key,
+            base_url=_base_url,
+            api_mode=_api_mode,
+            # Umbrella-building over a large skill collection is worth a
+            # high iteration ceiling — the pass typically takes 50-100
+            # API calls against hundreds of candidate skills. The
+            # single-session review path caps itself at a much smaller
+            # number because it's not doing a curation sweep.
+            max_iterations=9999,
+            quiet_mode=True,
+            platform="curator",
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        # Disable recursive nudges — the curator must never spawn its own review.
+        review_agent._memory_nudge_interval = 0
+        review_agent._skill_nudge_interval = 0
+
+        # Redirect the forked agent's stdout/stderr to /dev/null while it
+        # runs so its tool-call chatter doesn't pollute the foreground
+        # terminal. The background-thread runner also hides it; this
+        # belt-and-suspenders path matters when a caller invokes
+        # run_curator_review(synchronous=True) from the CLI.
+        with open(os.devnull, "w") as _devnull, \
+             contextlib.redirect_stdout(_devnull), \
+             contextlib.redirect_stderr(_devnull):
+            conv_result = review_agent.run_conversation(user_message=prompt)
+
+        final = ""
+        if isinstance(conv_result, dict):
+            final = str(conv_result.get("final_response") or "").strip()
+        result_meta["final"] = final
+        result_meta["summary"] = (final[:240] + "…") if len(final) > 240 else (final or "no change")
+
+        # Collect tool calls for the report. Walk the forked agent's
+        # session messages and extract every tool_call made during the
+        # pass. Truncate argument payloads so a giant skill_manage create
+        # doesn't blow up the report.
+        _calls: List[Dict[str, Any]] = []
+        for msg in getattr(review_agent, "_session_messages", []) or []:
+            if not isinstance(msg, dict):
+                continue
+            tcs = msg.get("tool_calls") or []
+            for tc in tcs:
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") or {}
+                name = fn.get("name") or ""
+                args_raw = fn.get("arguments") or ""
+                if isinstance(args_raw, str) and len(args_raw) > 400:
+                    args_raw = args_raw[:400] + "…"
+                _calls.append({"name": name, "arguments": args_raw})
+        result_meta["tool_calls"] = _calls
+    except Exception as e:
+        result_meta["error"] = f"error: {e}"
+        result_meta["summary"] = result_meta["error"]
+    finally:
+        if review_agent is not None:
+            try:
+                review_agent.close()
+            except Exception:
+                pass
+    return result_meta
+
+
+# ---------------------------------------------------------------------------
+# Public entrypoint for the session-start hook
+# ---------------------------------------------------------------------------
+
+def maybe_run_curator(
+    *,
+    idle_for_seconds: Optional[float] = None,
+    on_summary: Optional[Callable[[str], None]] = None,
+) -> Optional[Dict[str, Any]]:
+    """Best-effort: run a curator pass if all gates pass. Returns the result
+    dict if a pass was started, else None. Never raises."""
+    try:
+        if not should_run_now():
+            return None
+        # Idle gating: only enforce when the caller provided a measurement.
+        if idle_for_seconds is not None:
+            min_idle_s = get_min_idle_hours() * 3600.0
+            if idle_for_seconds < min_idle_s:
+                return None
+        return run_curator_review(on_summary=on_summary)
+    except Exception as e:
+        logger.debug("maybe_run_curator failed: %s", e, exc_info=True)
+        return None
@@ -402,6 +402,41 @@ class MemoryManager:
                    provider.name, e,
                )

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Notify all providers that the agent's session_id has rotated.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
+        context compression — any path that reassigns
+        ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers keep running; they only need to refresh cached
+        per-session state so subsequent writes land in the correct
+        session's record. See ``MemoryProvider.on_session_switch`` for
+        the full contract.
+        """
+        if not new_session_id:
+            return
+        for provider in self._providers:
+            try:
+                provider.on_session_switch(
+                    new_session_id,
+                    parent_session_id=parent_session_id,
+                    reset=reset,
+                    **kwargs,
+                )
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_session_switch failed: %s",
+                    provider.name, e,
+                )
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Notify all providers before context compression.

@@ -25,6 +25,7 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
 Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
+  on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
  on_pre_compress(messages) -> str       — extract before context compression
  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
@@ -160,6 +161,45 @@ class MemoryProvider(ABC):
        (CLI exit, /reset, gateway session expiry).
        """

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Called when the agent switches session_id mid-process.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
+        gateway equivalents, and context compression — any path that
+        reassigns ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers that cache per-session state in ``initialize()``
+        (``_session_id``, ``_document_id``, accumulated turn buffers,
+        counters) should update or reset that state here so subsequent
+        writes land in the correct session's record.
+
+        Parameters
+        ----------
+        new_session_id:
+            The session_id the agent just switched to.
+        parent_session_id:
+            The previous session_id, if meaningful — set for ``/branch``
+            (fork lineage), context compression (continuation lineage),
+            and ``/resume`` (the session we're leaving). Empty string
+            when no lineage applies.
+        reset:
+            ``True`` when this is a genuinely new conversation, not a
+            resumption of an existing one. Fired by ``/reset`` / ``/new``.
+            Providers should flush accumulated per-session buffers
+            (``_session_turns``, ``_turn_counter``, etc.) when this is
+            set. ``False`` for ``/resume`` / ``/branch`` / compression
+            where the logical conversation continues under the new id.
+
+        Default is no-op for backward compatibility.
+        """
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Called before context compression discards old messages.

@@ -46,7 +46,7 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -149,6 +149,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
+    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
@@ -98,17 +98,19 @@ def tool_progress_hint_cli() -> str:
 def openclaw_residue_hint_cli() -> str:
    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.

-    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
-    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
-    get carried forward and the agent dutifully reads them). ``hermes claw
-    cleanup`` renames the directory so the agent stops finding it.
+    Points users at ``hermes claw migrate`` (non-destructive port of config,
+    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
+    follow-up step for users who have already migrated and want to archive
+    the old directory — with a warning that archiving breaks OpenClaw.
    """
    return (
-        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
-        "After migrating, the agent can still get confused and read that "
-        "directory's config/memory instead of Hermes's.\n"
-        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
-        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
+        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
+        "To port your config, memory, and skills over to Hermes, run "
+        "`hermes claw migrate`.\n"
+        "If you've already migrated and want to archive the old directory, "
+        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
+        "OpenClaw will stop working after this).\n"
+        "This tip only shows once."
    )


@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

+    from hermes_constants import get_hermes_home
+
+    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded).resolve()
+        p = Path(expanded)
+        # Resolve relative paths against HERMES_HOME, not cwd
+        if not p.is_absolute():
+            p = (hermes_home / p).resolve()
+        else:
+            p = p.resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -85,6 +85,9 @@ class AnthropicTransport(ProviderTransport):
        from agent.anthropic_adapter import _to_plain_data
        from agent.transports.types import ToolCall

+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        _MCP_PREFIX = "mcp_"
+
        text_parts = []
        reasoning_parts = []
        reasoning_details = []
@@ -99,10 +102,13 @@ class AnthropicTransport(ProviderTransport):
                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
                tool_calls.append(
                    ToolCall(
                        id=block.id,
-                        name=block.name,
+                        name=name,
                        arguments=json.dumps(block.input),
                    )
                )
@@ -20,12 +20,7 @@ from agent.transports.types import NormalizedResponse, ToolCall, Usage


 def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
-    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
-
-    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
-    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
-    then request thought parts with ``includeThoughts`` enabled.
-    """
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
    if reasoning_config is None or not isinstance(reasoning_config, dict):
        return None

@@ -71,6 +66,30 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
    return thinking_config


+def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
+    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
+    if not isinstance(config, dict) or not config:
+        return None
+
+    translated: Dict[str, Any] = {}
+    if isinstance(config.get("includeThoughts"), bool):
+        translated["include_thoughts"] = config["includeThoughts"]
+    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
+        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
+    if isinstance(config.get("thinkingBudget"), (int, float)):
+        translated["thinking_budget"] = int(config["thinkingBudget"])
+    return translated or None
+
+
+def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return normalized.endswith("/openai")
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -309,6 +328,7 @@ class ChatCompletionsTransport(ProviderTransport):
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
        provider_name = str(params.get("provider_name") or "").strip().lower()
+        base_url = params.get("base_url")

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -362,7 +382,19 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

-        if provider_name in {"gemini", "google-gemini-cli"}:
+        if provider_name == "gemini":
+            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if _is_gemini_openai_compat_base_url(base_url):
+                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
+                if thinking_config:
+                    openai_compat_extra = extra_body.get("extra_body", {})
+                    google_extra = openai_compat_extra.get("google", {})
+                    google_extra["thinking_config"] = thinking_config
+                    openai_compat_extra["google"] = google_extra
+                    extra_body["extra_body"] = openai_compat_extra
+            elif raw_thinking_config:
+                extra_body["thinking_config"] = raw_thinking_config
+        elif provider_name == "google-gemini-cli":
            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
            if thinking_config:
                extra_body["thinking_config"] = thinking_config
@@ -359,6 +359,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
+    # MiniMax
+    (
+        "minimax",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
+    (
+        "minimax-cn",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
 }


@@ -400,6 +419,8 @@ def resolve_billing_route(
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name in {"minimax", "minimax-cn"}:
+        return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name in {"custom", "local"} or (base and "localhost" in base):
        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
@@ -180,6 +180,11 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
+#   # Optional: run the container as your host user's uid:gid so files written
+#   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
+#   # caps too since no gosu privilege drop is needed. Leave off if your
+#   # chosen docker_image expects to start as root.
+#   docker_run_as_host_user: true
 #   # Optional: explicitly forward selected env vars into Docker.
 #   # These values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands run in the container.
@@ -80,6 +80,11 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_constants import get_hermes_home, display_hermes_home
+from hermes_cli.browser_connect import (
+    DEFAULT_BROWSER_CDP_URL,
+    manual_chrome_debug_command,
+    try_launch_chrome_debug,
+)
 from hermes_cli.env_loader import load_hermes_dotenv
 from utils import base_url_host_matches

@@ -240,65 +245,6 @@ def _parse_service_tier_config(raw: str) -> str | None:
    logger.warning("Unknown service_tier '%s', ignoring", raw)
    return None

-
-
-def _get_chrome_debug_candidates(system: str) -> list[str]:
-    """Return likely browser executables for local CDP auto-launch."""
-    candidates: list[str] = []
-    seen: set[str] = set()
-
-    def _add_candidate(path: str | None) -> None:
-        if not path:
-            return
-        normalized = os.path.normcase(os.path.normpath(path))
-        if normalized in seen:
-            return
-        if os.path.isfile(path):
-            candidates.append(path)
-            seen.add(normalized)
-
-    def _add_from_path(*names: str) -> None:
-        for name in names:
-            _add_candidate(shutil.which(name))
-
-    if system == "Darwin":
-        for app in (
-            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
-            "/Applications/Chromium.app/Contents/MacOS/Chromium",
-            "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
-            "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
-        ):
-            _add_candidate(app)
-    elif system == "Windows":
-        _add_from_path(
-            "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
-            "chrome", "msedge", "brave", "chromium",
-        )
-
-        for base in (
-            os.environ.get("ProgramFiles"),
-            os.environ.get("ProgramFiles(x86)"),
-            os.environ.get("LOCALAPPDATA"),
-        ):
-            if not base:
-                continue
-            for parts in (
-                ("Google", "Chrome", "Application", "chrome.exe"),
-                ("Chromium", "Application", "chrome.exe"),
-                ("Chromium", "Application", "chromium.exe"),
-                ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
-                ("Microsoft", "Edge", "Application", "msedge.exe"),
-            ):
-                _add_candidate(os.path.join(base, *parts))
-    else:
-        _add_from_path(
-            "google-chrome", "google-chrome-stable", "chromium-browser",
-            "chromium", "brave-browser", "microsoft-edge",
-        )
-
-    return candidates
-
-
 def load_cli_config() -> Dict[str, Any]:
    """
    Load CLI configuration from config files.
@@ -551,18 +497,20 @@ def load_cli_config() -> Dict[str, Any]:
        "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "modal_image": "TERMINAL_MODAL_IMAGE",
        "daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        # SSH config
        "ssh_host": "TERMINAL_SSH_HOST",
        "ssh_user": "TERMINAL_SSH_USER",
        "ssh_port": "TERMINAL_SSH_PORT",
        "ssh_key": "TERMINAL_SSH_KEY",
-        # Container resource config (docker, singularity, modal, daytona -- ignored for local/ssh)
+        # Container resource config (docker, singularity, modal, daytona, vercel_sandbox -- ignored for local/ssh)
        "container_cpu": "TERMINAL_CONTAINER_CPU",
        "container_memory": "TERMINAL_CONTAINER_MEMORY",
        "container_disk": "TERMINAL_CONTAINER_DISK",
        "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
        "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
        "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "sandbox_dir": "TERMINAL_SANDBOX_DIR",
        # Persistent shell (non-local backends)
        "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
@@ -2171,6 +2119,11 @@ class HermesCLI:
        self._pending_input = queue.Queue()
        self._interrupt_queue = queue.Queue()
        self._should_exit = False
+        # /exit --delete: when True, the current session's SQLite history and
+        # on-disk transcripts are deleted during shutdown. Set by
+        # process_command() when the user runs /exit --delete or /quit --delete.
+        # Ported from google-gemini/gemini-cli#19332.
+        self._delete_session_on_exit = False
        self._last_ctrl_c_time = 0
        self._clarify_state = None
        self._clarify_freetext = False
@@ -4862,6 +4815,22 @@ class HermesCLI:
                    )
                except Exception:
                    pass
+            # Notify memory providers that session_id rotated to a fresh
+            # conversation. reset=True signals providers to flush accumulated
+            # per-session state (_session_turns, _turn_counter, _document_id).
+            # Fires BEFORE the plugin on_session_reset hook (shell hooks only
+            # see the new id; Python providers see the transition). See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        self.session_id,
+                        parent_session_id=old_session_id or "",
+                        reset=True,
+                        reason="new_session",
+                    )
+            except Exception:
+                pass
            self._notify_session_boundary("on_session_reset")

        if not silent:
@@ -4914,6 +4883,7 @@ class HermesCLI:
            _cprint("  Already on that session.")
            return

+        old_session_id = self.session_id
        # End current session
        try:
            self._session_db.end_session(self.session_id, "resumed_other")
@@ -4951,6 +4921,22 @@ class HermesCLI:
            if hasattr(self.agent, "_invalidate_system_prompt"):
                self.agent._invalidate_system_prompt()

+            # Notify memory providers that session_id rotated to a resumed
+            # session. reset=False — the provider's accumulated state is
+            # still valid; it just needs to target the new session_id for
+            # subsequent writes. See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        target_id,
+                        parent_session_id=old_session_id or "",
+                        reset=False,
+                        reason="resume",
+                    )
+            except Exception:
+                pass
+
        title_part = f" \"{session_meta['title']}\"" if session_meta.get("title") else ""
        msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
        if self.conversation_history:
@@ -5071,6 +5057,22 @@ class HermesCLI:
            if hasattr(self.agent, "_invalidate_system_prompt"):
                self.agent._invalidate_system_prompt()

+            # Notify memory providers that session_id forked to a new branch.
+            # reset=False — the branched session carries the transcript
+            # forward, so provider state tracks the lineage. parent_session_id
+            # links the branch back to the original. See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        new_session_id,
+                        parent_session_id=parent_session_id or "",
+                        reset=False,
+                        reason="branch",
+                    )
+            except Exception:
+                pass
+
        msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
        _cprint(
            f"  ⑂ Branched session \"{branch_title}\""
@@ -5979,7 +5981,29 @@ class HermesCLI:

        print(f"(._.) Unknown cron command: {subcommand}")
        print("  Available: list, add, edit, pause, resume, run, remove")
-    
+
+    def _handle_curator_command(self, cmd: str):
+        """Handle /curator slash command.
+
+        Delegates to hermes_cli.curator so the CLI and the `hermes curator`
+        subcommand share the same handler set.
+        """
+        import shlex
+
+        tokens = shlex.split(cmd)[1:] if cmd else []
+        if not tokens:
+            tokens = ["status"]
+
+        try:
+            from hermes_cli.curator import cli_main
+            cli_main(tokens)
+        except SystemExit:
+            # argparse calls sys.exit() on --help or errors; swallow so we
+            # don't kill the interactive session.
+            pass
+        except Exception as exc:
+            print(f"(._.) curator: {exc}")
+
    def _handle_skills_command(self, cmd: str):
        """Handle /skills slash command — delegates to hermes_cli.skills_hub."""
        from hermes_cli.skills_hub import handle_skills_slash
@@ -6064,6 +6088,16 @@ class HermesCLI:
        canonical = _cmd_def.name if _cmd_def else _base_word
        
        if canonical in ("quit", "exit", "q"):
+            # Parse --delete flag: /exit --delete also removes the current
+            # session's transcripts + SQLite history. Ported from
+            # google-gemini/gemini-cli#19332.
+            _rest = cmd_original.split(None, 1)
+            _args = (_rest[1] if len(_rest) > 1 else "").strip().lower()
+            if _args in ("--delete", "-d"):
+                self._delete_session_on_exit = True
+            elif _args:
+                _cprint(f"  {_DIM}✗ Unknown argument: {_escape(_args)}. Use /exit --delete to also remove session history.{_RST}")
+                return True
            return False
        elif canonical == "help":
            self.show_help()
@@ -6223,6 +6257,8 @@ class HermesCLI:
            self.save_conversation()
        elif canonical == "cron":
            self._handle_cron_command(cmd_original)
+        elif canonical == "curator":
+            self._handle_curator_command(cmd_original)
        elif canonical == "skills":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._handle_skills_command(cmd_original)
@@ -6606,34 +6642,7 @@ class HermesCLI:

        Returns True if a launch command was executed (doesn't guarantee success).
        """
-        import subprocess as _sp
-
-        candidates = _get_chrome_debug_candidates(system)
-
-        if not candidates:
-            return False
-
-        # Dedicated profile dir so debug Chrome won't collide with normal Chrome
-        data_dir = str(_hermes_home / "chrome-debug")
-        os.makedirs(data_dir, exist_ok=True)
-
-        chrome = candidates[0]
-        try:
-            _sp.Popen(
-                [
-                    chrome,
-                    f"--remote-debugging-port={port}",
-                    f"--user-data-dir={data_dir}",
-                    "--no-first-run",
-                    "--no-default-browser-check",
-                ],
-                stdout=_sp.DEVNULL,
-                stderr=_sp.DEVNULL,
-                start_new_session=True,  # detach from terminal
-            )
-            return True
-        except Exception:
-            return False
+        return try_launch_chrome_debug(port, system)

    def _handle_browser_command(self, cmd: str):
        """Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
@@ -6642,13 +6651,44 @@ class HermesCLI:
        parts = cmd.strip().split(None, 1)
        sub = parts[1].lower().strip() if len(parts) > 1 else "status"

-        _DEFAULT_CDP = "http://127.0.0.1:9222"
+        _DEFAULT_CDP = DEFAULT_BROWSER_CDP_URL
        current = os.environ.get("BROWSER_CDP_URL", "").strip()

        if sub.startswith("connect"):
            # Optionally accept a custom CDP URL: /browser connect ws://host:port
            connect_parts = cmd.strip().split(None, 2)  # ["/browser", "connect", "ws://..."]
            cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
+            parsed_cdp = urlparse(cdp_url if "://" in cdp_url else f"http://{cdp_url}")
+            if parsed_cdp.scheme not in {"http", "https", "ws", "wss"}:
+                print()
+                print(
+                    f"   ⚠ Unsupported browser url scheme: {parsed_cdp.scheme or '(missing)'} "
+                    "(expected one of: http, https, ws, wss)"
+                )
+                print()
+                return
+            try:
+                _port = parsed_cdp.port or (443 if parsed_cdp.scheme in {"https", "wss"} else 80)
+            except ValueError:
+                print()
+                print(f"   ⚠ Invalid port in browser url: {cdp_url}")
+                print()
+                return
+            if not parsed_cdp.hostname:
+                print()
+                print(f"   ⚠ Missing host in browser url: {cdp_url}")
+                print()
+                return
+            _host = parsed_cdp.hostname
+            if parsed_cdp.path.startswith("/devtools/browser/"):
+                cdp_url = parsed_cdp.geturl()
+            else:
+                cdp_url = parsed_cdp._replace(
+                    path="",
+                    params="",
+                    query="",
+                    fragment="",
+                ).geturl()

            # Clear any existing browser sessions so the next tool call uses the new backend
            try:
@@ -6659,20 +6699,13 @@ class HermesCLI:

            print()

-            # Extract port for connectivity checks
-            _port = 9222
-            try:
-                _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
-            except (ValueError, IndexError):
-                pass
-
            # Check if Chrome is already listening on the debug port
            import socket
            _already_open = False
            try:
                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                s.settimeout(1)
-                s.connect(("127.0.0.1", _port))
+                s.connect((_host, _port))
                s.close()
                _already_open = True
            except (OSError, socket.timeout):
@@ -6690,7 +6723,7 @@ class HermesCLI:
                        try:
                            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                            s.settimeout(1)
-                            s.connect(("127.0.0.1", _port))
+                            s.connect((_host, _port))
                            s.close()
                            _already_open = True
                            break
@@ -6703,33 +6736,22 @@ class HermesCLI:
                        print("     Try again in a few seconds — the debug instance may still be starting")
                else:
                    print("   ⚠ Could not auto-launch Chrome")
-                    # Show manual instructions as fallback
-                    _data_dir = str(_hermes_home / "chrome-debug")
                    sys_name = _plat.system()
-                    if sys_name == "Darwin":
-                        chrome_cmd = (
-                            'open -a "Google Chrome" --args'
-                            f" --remote-debugging-port=9222"
-                            f' --user-data-dir="{_data_dir}"'
-                            " --no-first-run --no-default-browser-check"
-                        )
-                    elif sys_name == "Windows":
-                        chrome_cmd = (
-                            f'chrome.exe --remote-debugging-port=9222'
-                            f' --user-data-dir="{_data_dir}"'
-                            f" --no-first-run --no-default-browser-check"
-                        )
+                    chrome_cmd = manual_chrome_debug_command(_port, sys_name)
+                    if chrome_cmd:
+                        print(f"     Launch Chrome manually:")
+                        print(f"     {chrome_cmd}")
                    else:
-                        chrome_cmd = (
-                            f"google-chrome --remote-debugging-port=9222"
-                            f' --user-data-dir="{_data_dir}"'
-                            f" --no-first-run --no-default-browser-check"
-                        )
-                    print(f"     Launch Chrome manually:")
-                    print(f"     {chrome_cmd}")
+                        print("     No Chrome/Chromium executable found in this environment")
            else:
                print(f"   ⚠ Port {_port} is not reachable at {cdp_url}")

+            if not _already_open:
+                print()
+                print("Browser not connected — start Chrome with remote debugging and retry /browser connect")
+                print()
+                return
+
            os.environ["BROWSER_CDP_URL"] = cdp_url
            # Eagerly start the CDP supervisor so pending_dialogs + frame_tree
            # show up in the next browser_snapshot.  No-op if already started.
@@ -9344,6 +9366,21 @@ class HermesCLI:
            self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
        except Exception:
            pass  # Tips are non-critical — never break startup
+
+        # Curator — kick off a background skill-maintenance pass on startup
+        # if the schedule says we're due.  Runs in a daemon thread so it
+        # never blocks the interactive loop.  Best-effort; any failure is
+        # swallowed to avoid breaking session startup.
+        try:
+            from agent.curator import maybe_run_curator
+            maybe_run_curator(
+                idle_for_seconds=float("inf"),  # CLI startup = fully idle
+                on_summary=lambda msg: self._console_print(
+                    f"[dim #6b7684]💾 {msg}[/]"
+                ),
+            )
+        except Exception:
+            pass
        if self.preloaded_skills and not self._startup_skills_line_shown:
            skills_label = ", ".join(self.preloaded_skills)
            self._console_print(
@@ -11152,6 +11189,19 @@ class HermesCLI:
                    self._session_db.end_session(self.agent.session_id, "cli_close")
                except (Exception, KeyboardInterrupt) as e:
                    logger.debug("Could not close session in DB: %s", e)
+                # /exit --delete: also remove the current session's transcripts
+                # and SQLite history. Ported from google-gemini/gemini-cli#19332.
+                if getattr(self, '_delete_session_on_exit', False):
+                    try:
+                        from hermes_constants import get_hermes_home as _ghh
+                        _sessions_dir = _ghh() / "sessions"
+                        _sid = self.agent.session_id
+                        if self._session_db.delete_session(_sid, sessions_dir=_sessions_dir):
+                            _cprint(f"  {_DIM}✓ Session {_escape(_sid)} deleted{_RST}")
+                        else:
+                            _cprint(f"  {_DIM}✗ Session {_escape(_sid)} not found for deletion{_RST}")
+                    except (Exception, KeyboardInterrupt) as e:
+                        logger.debug("Could not delete session on exit: %s", e)
            # Plugin hook: on_session_end — safety net for interrupted exits.
            # run_conversation() already fires this per-turn on normal completion,
            # so only fire here if the agent was mid-turn (_agent_running) when
@@ -313,13 +313,21 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' "
-                "is not installed. Install the 'cron' extra (pip install "
-                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
+                "Cannot compute next run for cron schedule %r: 'croniter' is "
+                "not installed. croniter is a core dependency as of v0.9.x; "
+                "reinstall hermes-agent or run 'pip install croniter' in your "
+                "runtime env.",
                schedule.get("expr"),
            )
            return None
-        cron = croniter(schedule["expr"], now)
+        # Use last_run_at as the croniter base when available, consistent
+        # with interval jobs.  This ensures that after a crash/restart,
+        # the next run is anchored to the actual last execution time
+        # rather than to an arbitrary restart time.
+        base_time = now
+        if last_run_at:
+            base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
+        cron = croniter(schedule["expr"], base_time)
        next_run = cron.get_next(datetime)
        return next_run.isoformat()

@@ -233,12 +233,32 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
    }


+def _normalize_deliver_value(deliver) -> str:
+    """Normalize a stored/submitted ``deliver`` value to its canonical string form.
+
+    The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
+    ``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
+    Historically some callers — MCP clients passing an array, direct edits of
+    ``jobs.json``, or stale code paths — have stored a list/tuple like
+    ``["telegram"]``.  ``str(["telegram"])`` would serialize to the literal
+    string ``"['telegram']"``, which is not a known platform and fails
+    resolution silently.  Flatten lists/tuples into a comma-separated string
+    so both forms work.  Returns ``"local"`` for anything falsy.
+    """
+    if deliver is None or deliver == "":
+        return "local"
+    if isinstance(deliver, (list, tuple)):
+        parts = [str(p).strip() for p in deliver if str(p).strip()]
+        return ",".join(parts) if parts else "local"
+    return str(deliver)
+
+
 def _resolve_delivery_targets(job: dict) -> List[dict]:
    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
-    deliver = job.get("deliver", "local")
+    deliver = _normalize_deliver_value(job.get("deliver", "local"))
    if deliver == "local":
        return []
-    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
+    parts = [p.strip() for p in deliver.split(",") if p.strip()]
    seen = set()
    targets = []
    for part in parts:
@@ -1013,10 +1033,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
-            # .cursorrules from that directory; otherwise preserve the old
-            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            # Cron jobs should always inherit the user's SOUL.md identity from
+            # HERMES_HOME. When a workdir is configured, also inject project
+            # context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
+            # Without a workdir, keep cwd context discovery disabled.
            skip_context_files=not bool(_job_workdir),
+            load_soul_identity=True,
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1031,7 +1053,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        #
        # Uses the agent's built-in activity tracker (updated by
        # _touch_activity() on every tool call, API call, and stream delta).
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        _raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        if _raw_cron_timeout:
+            try:
+                _cron_timeout = float(_raw_cron_timeout)
+            except (ValueError, TypeError):
+                logger.warning(
+                    "Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
+                    _raw_cron_timeout,
+                )
+                _cron_timeout = 600.0
+        else:
+            _cron_timeout = 600.0
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -7,7 +7,9 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
+- GET  /v1/capabilities            — machine-readable API capabilities for external UIs
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
+- GET  /v1/runs/{run_id}           — retrieve current run status
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
@@ -590,6 +592,8 @@ class APIServerAdapter(BasePlatformAdapter):
        # Active run agent/task references for stop support
        self._active_run_agents: Dict[str, Any] = {}
        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
+        # Pollable run status for dashboards and external control-plane UIs.
+        self._run_statuses: Dict[str, Dict[str, Any]] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -808,6 +812,51 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

+    async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
+        """GET /v1/capabilities — advertise the stable API surface.
+
+        External UIs and orchestrators use this endpoint to discover the API
+        server's plugin-safe contract without scraping docs or assuming that
+        every Hermes version exposes the same endpoints.
+        """
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        return web.json_response({
+            "object": "hermes.api_server.capabilities",
+            "platform": "hermes-agent",
+            "model": self._model_name,
+            "auth": {
+                "type": "bearer",
+                "required": bool(self._api_key),
+            },
+            "features": {
+                "chat_completions": True,
+                "chat_completions_streaming": True,
+                "responses_api": True,
+                "responses_streaming": True,
+                "run_submission": True,
+                "run_status": True,
+                "run_events_sse": True,
+                "run_stop": True,
+                "tool_progress_events": True,
+                "session_continuity_header": "X-Hermes-Session-Id",
+                "cors": bool(self._cors_origins),
+            },
+            "endpoints": {
+                "health": {"method": "GET", "path": "/health"},
+                "health_detailed": {"method": "GET", "path": "/health/detailed"},
+                "models": {"method": "GET", "path": "/v1/models"},
+                "chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
+                "responses": {"method": "POST", "path": "/v1/responses"},
+                "runs": {"method": "POST", "path": "/v1/runs"},
+                "run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
+                "run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
+                "run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
+            },
+        })
+
    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
@@ -932,39 +981,62 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(event_type, name, preview, args, **kwargs):
-                """Send tool progress as a separate SSE event.
+            # Track which tool_call_ids we've emitted a "running" lifecycle
+            # event for, so a "completed" event without a matching "running"
+            # (e.g. internal/filtered tools) is silently dropped instead of
+            # producing an orphaned event clients can't correlate.
+            _started_tool_call_ids: set[str] = set()

-                Previously, progress markers like ``⏰ list`` were injected
-                directly into ``delta.content``.  OpenAI-compatible frontends
-                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
-                the assistant message and send it back on subsequent requests.
-                After enough turns the model learns to *emit* the markers as
-                plain text instead of issuing real tool calls — silently
-                hallucinating tool results.  See #6972.
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Emit ``hermes.tool.progress`` with ``status: running``.

-                The fix: push a tagged tuple ``("__tool_progress__", payload)``
-                onto the stream queue.  The SSE writer emits it as a custom
-                ``event: hermes.tool.progress`` line that compliant frontends
-                can render for UX but will *not* persist into conversation
-                history.  Clients that don't understand the custom event type
-                silently ignore it per the SSE specification.
+                Replaces the old ``tool_progress_callback("tool.started",
+                ...)`` emit so SSE consumers receive a single event per
+                tool start, carrying both the legacy ``tool``/``emoji``/
+                ``label`` payload (for #6972 frontends) and the new
+                ``toolCallId``/``status`` correlation fields (#16588).
+
+                Skips tools whose names start with ``_`` so internal
+                events (``_thinking``, …) stay off the wire — matching
+                the prior ``_on_tool_progress`` filter exactly.
                """
-                if event_type != "tool.started":
+                if not tool_call_id or function_name.startswith("_"):
                    return
-                if name.startswith("_"):
-                    return
-                from agent.display import get_tool_emoji
-                emoji = get_tool_emoji(name)
-                label = preview or name
+                _started_tool_call_ids.add(tool_call_id)
+                from agent.display import build_tool_preview, get_tool_emoji
+                label = build_tool_preview(function_name, function_args) or function_name
                _stream_q.put(("__tool_progress__", {
-                    "tool": name,
-                    "emoji": emoji,
+                    "tool": function_name,
+                    "emoji": get_tool_emoji(function_name),
                    "label": label,
+                    "toolCallId": tool_call_id,
+                    "status": "running",
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Emit the matching ``status: completed`` event.
+
+                Dropped if the start was filtered (internal tool, missing
+                id, or never seen) so clients never get an orphaned
+                ``completed`` they can't correlate to a prior ``running``.
+                """
+                if not tool_call_id or tool_call_id not in _started_tool_call_ids:
+                    return
+                _started_tool_call_ids.discard(tool_call_id)
+                _stream_q.put(("__tool_progress__", {
+                    "tool": function_name,
+                    "toolCallId": tool_call_id,
+                    "status": "completed",
                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
+            #
+            # ``tool_progress_callback`` is intentionally not wired here:
+            # it would duplicate every emit because ``run_agent`` fires it
+            # side-by-side with ``tool_start_callback``/``tool_complete_callback``.
+            # The structured callbacks are strictly richer (they carry the
+            # tool_call id), so they own the chat-completions SSE channel.
            agent_ref = [None]
            agent_task = asyncio.ensure_future(self._run_agent(
                user_message=user_message,
@@ -972,7 +1044,8 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
            ))

@@ -1087,7 +1160,8 @@ class APIServerAdapter(BasePlatformAdapter):
                Tagged tuples ``("__tool_progress__", payload)`` are sent
                as a custom ``event: hermes.tool.progress`` SSE event so
                frontends can display them without storing the markers in
-                conversation history.  See #6972.
+                conversation history.  See #6972 for the original event,
+                #16588 for the ``toolCallId``/``status`` lifecycle fields.
                """
                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
                    event_data = json.dumps(item[1])
@@ -2297,10 +2371,31 @@ class APIServerAdapter(BasePlatformAdapter):

    _MAX_CONCURRENT_RUNS = 10  # Prevent unbounded resource allocation
    _RUN_STREAM_TTL = 300  # seconds before orphaned runs are swept
+    _RUN_STATUS_TTL = 3600  # seconds to retain terminal run status for polling
+
+    def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
+        """Update pollable run status without exposing private agent objects."""
+        now = time.time()
+        current = self._run_statuses.get(run_id, {})
+        current.update({
+            "object": "hermes.run",
+            "run_id": run_id,
+            "status": status,
+            "updated_at": now,
+        })
+        current.setdefault("created_at", fields.pop("created_at", now))
+        current.update(fields)
+        self._run_statuses[run_id] = current
+        return current

    def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
        """Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
        def _push(event: Dict[str, Any]) -> None:
+            self._set_run_status(
+                run_id,
+                self._run_statuses.get(run_id, {}).get("status", "running"),
+                last_event=event.get("event"),
+            )
            q = self._run_streams.get(run_id)
            if q is None:
                return
@@ -2365,28 +2460,6 @@ class APIServerAdapter(BasePlatformAdapter):
        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

-        run_id = f"run_{uuid.uuid4().hex}"
-        loop = asyncio.get_running_loop()
-        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
-        self._run_streams[run_id] = q
-        self._run_streams_created[run_id] = time.time()
-
-        event_cb = self._make_run_event_callback(run_id, loop)
-
-        # Also wire stream_delta_callback so message.delta events flow through
-        def _text_cb(delta: Optional[str]) -> None:
-            if delta is None:
-                return
-            try:
-                loop.call_soon_threadsafe(q.put_nowait, {
-                    "event": "message.delta",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "delta": delta,
-                })
-            except Exception:
-                pass
-
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")

@@ -2434,11 +2507,42 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

+        run_id = f"run_{uuid.uuid4().hex}"
        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions
+        loop = asyncio.get_running_loop()
+        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
+        created_at = time.time()
+        self._run_streams[run_id] = q
+        self._run_streams_created[run_id] = created_at
+
+        event_cb = self._make_run_event_callback(run_id, loop)
+
+        # Also wire stream_delta_callback so message.delta events flow through.
+        def _text_cb(delta: Optional[str]) -> None:
+            if delta is None:
+                return
+            try:
+                loop.call_soon_threadsafe(q.put_nowait, {
+                    "event": "message.delta",
+                    "run_id": run_id,
+                    "timestamp": time.time(),
+                    "delta": delta,
+                })
+            except Exception:
+                pass
+
+        self._set_run_status(
+            run_id,
+            "queued",
+            created_at=created_at,
+            session_id=session_id,
+            model=body.get("model", self._model_name),
+        )

        async def _run_and_close():
            try:
+                self._set_run_status(run_id, "running")
                agent = self._create_agent(
                    ephemeral_system_prompt=ephemeral_system_prompt,
                    session_id=session_id,
@@ -2468,8 +2572,36 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output": final_response,
                    "usage": usage,
                })
+                self._set_run_status(
+                    run_id,
+                    "completed",
+                    output=final_response,
+                    usage=usage,
+                    last_event="run.completed",
+                )
+            except asyncio.CancelledError:
+                self._set_run_status(
+                    run_id,
+                    "cancelled",
+                    last_event="run.cancelled",
+                )
+                try:
+                    q.put_nowait({
+                        "event": "run.cancelled",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                    })
+                except Exception:
+                    pass
+                raise
            except Exception as exc:
                logger.exception("[api_server] run %s failed", run_id)
+                self._set_run_status(
+                    run_id,
+                    "failed",
+                    error=str(exc),
+                    last_event="run.failed",
+                )
                try:
                    q.put_nowait({
                        "event": "run.failed",
@@ -2499,6 +2631,21 @@ class APIServerAdapter(BasePlatformAdapter):

        return web.json_response({"run_id": run_id, "status": "started"}, status=202)

+    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
+        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        status = self._run_statuses.get(run_id)
+        if status is None:
+            return web.json_response(
+                _openai_error(f"Run not found: {run_id}", code="run_not_found"),
+                status=404,
+            )
+        return web.json_response(status)
+
    async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
        """GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
        auth_err = self._check_auth(request)
@@ -2561,6 +2708,8 @@ class APIServerAdapter(BasePlatformAdapter):
        if agent is None and task is None:
            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)

+        self._set_run_status(run_id, "stopping", last_event="run.stopping")
+
        if agent is not None:
            try:
                agent.interrupt("Stop requested via API")
@@ -2603,6 +2752,15 @@ class APIServerAdapter(BasePlatformAdapter):
                self._active_run_agents.pop(run_id, None)
                self._active_run_tasks.pop(run_id, None)

+            stale_statuses = [
+                run_id
+                for run_id, status in list(self._run_statuses.items())
+                if status.get("status") in {"completed", "failed", "cancelled"}
+                and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
+            ]
+            for run_id in stale_statuses:
+                self._run_statuses.pop(run_id, None)
+
    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
    # ------------------------------------------------------------------
@@ -2621,6 +2779,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
+            self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
            self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
@@ -2636,6 +2795,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
+            self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
@@ -31,6 +31,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
@@ -162,6 +163,10 @@ class SignalAdapter(BasePlatformAdapter):
    """Signal messenger adapter using signal-cli HTTP daemon."""

    platform = Platform.SIGNAL
+    # Signal has no real edit API for already-sent messages. Mark it explicitly
+    # so streaming suppresses the visible cursor instead of leaving a stale tofu
+    # square behind in chat clients when edit attempts fail.
+    SUPPORTS_MESSAGE_EDITING = False

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.SIGNAL)
@@ -488,6 +493,11 @@ class SignalAdapter(BasePlatformAdapter):
        if text and mentions:
            text = _render_mentions(text, mentions)

+        # Extract quote (reply-to) context from Signal dataMessage
+        quote_data = data_message.get("quote") or {}
+        reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
+        reply_to_text = quote_data.get("text")
+
        # Process attachments
        attachments_data = data_message.get("attachments", [])
        media_urls = []
@@ -541,7 +551,9 @@ class SignalAdapter(BasePlatformAdapter):
        else:
            timestamp = datetime.now(tz=timezone.utc)

-        # Build and dispatch event
+        # Build and dispatch event.
+        # Store raw envelope data in raw_message so on_processing_start/complete
+        # can extract targetAuthor + targetTimestamp for sendReaction.
        event = MessageEvent(
            source=source,
            text=text or "",
@@ -549,6 +561,9 @@ class SignalAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            timestamp=timestamp,
+            raw_message={"sender": sender, "timestamp_ms": ts_ms},
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
        )

        logger.debug("Signal: message from %s in %s: %s",
@@ -707,6 +722,159 @@ class SignalAdapter(BasePlatformAdapter):
                logger.debug("Signal RPC %s failed: %s", method, e)
            return None

+    # ------------------------------------------------------------------
+    # Formatting — markdown → Signal body ranges
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _markdown_to_signal(text: str) -> tuple:
+        """Convert markdown to plain text + Signal textStyles list.
+
+        Signal doesn't render markdown.  Instead it uses ``bodyRanges``
+        (exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
+        with the format ``start:length:STYLE``.
+
+        Positions are measured in **UTF-16 code units** (not Python code
+        points) because that's what the Signal protocol uses.
+
+        Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
+        (Signal's SPOILER style is not currently mapped — no standard
+        markdown syntax for it; would need ``||spoiler||`` parsing.)
+
+        Returns ``(plain_text, styles_list)`` where *styles_list* may be
+        empty if there's nothing to format.
+        """
+        import re
+
+        def _utf16_len(s: str) -> int:
+            """Length of *s* in UTF-16 code units."""
+            return len(s.encode("utf-16-le")) // 2
+
+        # Pre-process: normalize whitespace before any position tracking
+        # so later operations don't invalidate recorded offsets.
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = text.strip()
+
+        styles: list = []
+
+        # --- Phase 1: fenced code blocks  ```...``` → MONOSPACE ---
+        _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
+        while m := _CB.search(text):
+            inner = m.group(1).rstrip("\n")
+            start = m.start()
+            text = text[: m.start()] + inner + text[m.end() :]
+            styles.append((start, len(inner), "MONOSPACE"))
+
+        # --- Phase 2: heading markers  # Foo → Foo (BOLD) ---
+        _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
+        new_text = ""
+        last_end = 0
+        for m in _HEADING.finditer(text):
+            new_text += text[last_end : m.start()]
+            last_end = m.end()
+            eol = text.find("\n", m.end())
+            if eol == -1:
+                eol = len(text)
+            heading_text = text[m.end() : eol]
+            start = len(new_text)
+            new_text += heading_text
+            styles.append((start, len(heading_text), "BOLD"))
+            last_end = eol
+        new_text += text[last_end:]
+        text = new_text
+
+        # --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
+        # The old code processed each pattern sequentially, stripping markers
+        # and recording positions per-pass.  Later passes shifted text without
+        # adjusting earlier positions → bold/italic landed mid-word.
+        #
+        # Fix: collect ALL non-overlapping matches first, then strip every
+        # marker in one pass so positions are computed against the final text.
+        _PATTERNS = [
+            (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
+            (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
+            (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
+            (re.compile(r"`(.+?)`"), "MONOSPACE"),
+            (re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
+            (re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
+        ]
+
+        # Collect all non-overlapping matches (earlier patterns win ties).
+        all_matches: list = []  # (start, end, g1_start, g1_end, style)
+        occupied: list = []     # (start, end) intervals already claimed
+        for pat, style in _PATTERNS:
+            for m in pat.finditer(text):
+                ms, me = m.start(), m.end()
+                if not any(ms < oe and me > os for os, oe in occupied):
+                    all_matches.append((ms, me, m.start(1), m.end(1), style))
+                    occupied.append((ms, me))
+        all_matches.sort()
+
+        # Build removal list so we can adjust Phase 1/2 styles.
+        # Each match removes its prefix markers (start..g1_start) and
+        # suffix markers (g1_end..end).
+        removals: list = []  # (position, length) sorted
+        for ms, me, g1s, g1e, _ in all_matches:
+            if g1s > ms:
+                removals.append((ms, g1s - ms))
+            if me > g1e:
+                removals.append((g1e, me - g1e))
+        removals.sort()
+
+        # Adjust Phase 1/2 styles for characters about to be removed.
+        def _adj(pos: int) -> int:
+            shift = 0
+            for rp, rl in removals:
+                if rp < pos:
+                    shift += min(rl, pos - rp)
+                else:
+                    break
+            return pos - shift
+
+        adjusted_prior: list = []
+        for s, l, st in styles:
+            ns = _adj(s)
+            ne = _adj(s + l)
+            if ne > ns:
+                adjusted_prior.append((ns, ne - ns, st))
+
+        # Strip all inline markers in one pass → positions are correct.
+        result = ""
+        last_end = 0
+        inline_styles: list = []
+        for ms, me, g1s, g1e, sty in all_matches:
+            result += text[last_end:ms]
+            pos = len(result)
+            inner = text[g1s:g1e]
+            result += inner
+            inline_styles.append((pos, len(inner), sty))
+            last_end = me
+        result += text[last_end:]
+        text = result
+
+        styles = adjusted_prior + inline_styles
+
+        # Convert code-point offsets → UTF-16 code-unit offsets
+        style_strings = []
+        for cp_start, cp_len, stype in sorted(styles):
+            # Safety: skip any out-of-bounds styles
+            if cp_start < 0 or cp_start + cp_len > len(text):
+                continue
+            u16_start = _utf16_len(text[:cp_start])
+            u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
+            style_strings.append(f"{u16_start}:{u16_len}:{stype}")
+
+        return text, style_strings
+
+    def format_message(self, content: str) -> str:
+        """Strip markdown for plain-text fallback (used by base class).
+
+        The actual rich formatting happens in send() via _markdown_to_signal().
+        """
+        # This is only called if someone uses the base-class send path.
+        # Our send() override bypasses this entirely.
+        return content
+
    # ------------------------------------------------------------------
    # Sending
    # ------------------------------------------------------------------
@@ -718,14 +886,22 @@ class SignalAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        """Send a text message."""
+        """Send a text message with native Signal formatting."""
        await self._stop_typing_indicator(chat_id)

+        plain_text, text_styles = self._markdown_to_signal(content)
+
        params: Dict[str, Any] = {
            "account": self.account,
-            "message": content,
+            "message": plain_text,
        }

+        if text_styles:
+            if len(text_styles) == 1:
+                params["textStyle"] = text_styles[0]
+            else:
+                params["textStyles"] = text_styles
+
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
@@ -735,11 +911,10 @@ class SignalAdapter(BasePlatformAdapter):

        if result is not None:
            self._track_sent_timestamp(result)
-            # Use the timestamp from the RPC result as a pseudo message_id.
-            # Signal doesn't have real message IDs, but the stream consumer
-            # needs a truthy value to follow its edit→fallback path correctly.
-            _msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
-            return SendResult(success=True, message_id=_msg_id or None)
+            # Signal has no editable message identifier. Returning None keeps the
+            # stream consumer on the non-edit fallback path instead of pretending
+            # future edits can remove an in-progress cursor from the chat thread.
+            return SendResult(success=True, message_id=None)
        return SendResult(success=False, error="RPC send failed")

    def _track_sent_timestamp(self, rpc_result) -> None:
@@ -963,6 +1138,110 @@ class SignalAdapter(BasePlatformAdapter):
        _keep_typing finally block to clean up platform-level typing tasks."""
        await self._stop_typing_indicator(chat_id)

+    # ------------------------------------------------------------------
+    # Reactions
+    # ------------------------------------------------------------------
+
+    async def send_reaction(
+        self,
+        chat_id: str,
+        emoji: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Send a reaction emoji to a specific message via signal-cli RPC.
+
+        Args:
+            chat_id: The chat (phone number or "group:<id>")
+            emoji: Reaction emoji string (e.g. "👀", "✅")
+            target_author: Phone number / UUID of the message author
+            target_timestamp: Signal timestamp (ms) of the message to react to
+        """
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": emoji,
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        if result is not None:
+            return True
+        logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
+        return False
+
+    async def remove_reaction(
+        self,
+        chat_id: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Remove a reaction by sending an empty-string emoji."""
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": "",
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+            "remove": True,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        return result is not None
+
+    # ------------------------------------------------------------------
+    # Processing Lifecycle Hooks (reactions as progress indicators)
+    # ------------------------------------------------------------------
+
+    def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
+        """Extract (target_author, target_timestamp) from a MessageEvent.
+
+        Returns None if the event doesn't carry the raw Signal envelope data
+        needed for sendReaction.
+        """
+        raw = event.raw_message
+        if not isinstance(raw, dict):
+            return None
+        author = raw.get("sender")
+        ts = raw.get("timestamp_ms")
+        if not author or not ts:
+            return None
+        return (author, ts)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """React with 👀 when processing begins."""
+        target = self._extract_reaction_target(event)
+        if target:
+            await self.send_reaction(event.source.chat_id, "👀", *target)
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
+        """Swap the 👀 reaction for ✅ (success) or ❌ (failure).
+
+        On CANCELLED we leave the 👀 in place — no terminal outcome means
+        the reaction should keep reflecting "in progress" (matches Telegram).
+        """
+        if outcome == ProcessingOutcome.CANCELLED:
+            return
+        target = self._extract_reaction_target(event)
+        if not target:
+            return
+        chat_id = event.source.chat_id
+        # Remove the in-progress reaction, then add the final one
+        await self.remove_reaction(chat_id, *target)
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self.send_reaction(chat_id, "✅", *target)
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self.send_reaction(chat_id, "❌", *target)
+
    # ------------------------------------------------------------------
    # Chat Info
    # ------------------------------------------------------------------
@@ -92,6 +92,18 @@ SESSION_EXPIRED_ERRCODE = -14
 RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

+
+def _is_stale_session_ret(
+    ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
+) -> bool:
+    """True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
+    which is a stale-session signal (same as errcode=-14) rather than
+    a genuine rate limit."""
+    if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
+        return False
+    return (errmsg or "").lower() == "unknown error"
+
+
 MEDIA_IMAGE = 1
 MEDIA_VIDEO = 2
 MEDIA_FILE = 3
@@ -1210,6 +1222,17 @@ class WeixinAdapter(BasePlatformAdapter):
        self._mark_connected()
        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
+        if self._group_policy != "disabled":
+            logger.warning(
+                "[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
+                "identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
+                "WeChat groups. iLink usually does not deliver ordinary-group events for "
+                "these accounts, so group messages may never reach Hermes regardless of this "
+                "policy. If group delivery doesn't work, the limitation is on the iLink side, "
+                "not in Hermes.",
+                self.name,
+                self._group_policy,
+            )
        return True

    async def disconnect(self) -> None:
@@ -1254,7 +1277,8 @@ class WeixinAdapter(BasePlatformAdapter):
                ret = response.get("ret", 0)
                errcode = response.get("errcode", 0)
                if ret not in (0, None) or errcode not in (0, None):
-                    if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
+                    if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
                        logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
                        await asyncio.sleep(600)
                        consecutive_failures = 0
@@ -1519,6 +1543,7 @@ class WeixinAdapter(BasePlatformAdapter):
                        is_session_expired = (
                            ret == SESSION_EXPIRED_ERRCODE
                            or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
                        )
                        # Session expired — strip token and retry once
                        if is_session_expired and not retried_without_token and context_token:
@@ -38,6 +38,7 @@ from typing import Dict, Optional, Any, List
 # gateway is a long-running daemon, so its boot cost matters less than
 # preserving the established test-patch surface.
 from agent.account_usage import fetch_account_usage, render_account_usage_lines
+from hermes_cli.config import cfg_get

 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
@@ -46,6 +47,7 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines
 # from _enforce_agent_cache_cap() and _session_expiry_watcher() below.
 _AGENT_CACHE_MAX_SIZE = 128
 _AGENT_CACHE_IDLE_TTL_SECS = 3600.0  # evict agents idle for >1h
+_PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT = 30.0
 # Only auto-continue interrupted gateway turns while the interruption is fresh.
 # Stale tool-tail/resume markers can otherwise revive an unrelated old task
 # after a gateway restart when the user's next message starts new work.
@@ -265,6 +267,7 @@ if _config_path.exists():
                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
                "modal_image": "TERMINAL_MODAL_IMAGE",
                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
+                "vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
                "ssh_host": "TERMINAL_SSH_HOST",
                "ssh_user": "TERMINAL_SSH_USER",
                "ssh_port": "TERMINAL_SSH_PORT",
@@ -274,6 +277,8 @@ if _config_path.exists():
                "container_disk": "TERMINAL_CONTAINER_DISK",
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
+                "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+                "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
            }
@@ -286,6 +291,10 @@ if _config_path.exists():
                    # Only bridge explicit absolute paths from config.yaml.
                    if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"):
                        continue
+                    # Expand shell tilde in cwd so subprocess.Popen never
+                    # receives a literal "~/" which the kernel rejects.
+                    if _cfg_key == "cwd" and isinstance(_val, str):
+                        _val = os.path.expanduser(_val)
                    if isinstance(_val, list):
                        os.environ[_env_var] = json.dumps(_val)
                    else:
@@ -1153,6 +1162,33 @@ class GatewayRunner:
                e,
            )

+    def _platform_connect_timeout_secs(self) -> float:
+        """Return the per-platform connect timeout used during startup/retry."""
+        raw = os.getenv("HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT", "").strip()
+        if raw:
+            try:
+                timeout = float(raw)
+            except ValueError:
+                logger.warning(
+                    "Ignoring invalid HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT=%r",
+                    raw,
+                )
+            else:
+                return max(0.0, timeout)
+        return _PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT
+
+    async def _connect_adapter_with_timeout(self, adapter, platform) -> bool:
+        """Connect an adapter without allowing one platform to block others."""
+        timeout = self._platform_connect_timeout_secs()
+        if timeout <= 0:
+            return await adapter.connect()
+        try:
+            return await asyncio.wait_for(adapter.connect(), timeout=timeout)
+        except asyncio.TimeoutError as exc:
+            raise TimeoutError(
+                f"{platform.value} connect timed out after {timeout:g}s"
+            ) from exc
+
    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly
@@ -1545,7 +1581,7 @@ class GatewayRunner:
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
-                return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
+                return (cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
        except Exception:
            pass
        return ""
@@ -1566,7 +1602,7 @@ class GatewayRunner:
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
-                effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
+                effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
        except Exception:
            pass
        result = parse_reasoning_effort(effort)
@@ -1649,7 +1685,7 @@ class GatewayRunner:
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
-                raw = str(cfg.get("agent", {}).get("service_tier", "") or "").strip()
+                raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
        except Exception:
            pass

@@ -1670,7 +1706,7 @@ class GatewayRunner:
            if cfg_path.exists():
                with open(cfg_path, encoding="utf-8") as _f:
                    cfg = _y.safe_load(_f) or {}
-                return bool(cfg.get("display", {}).get("show_reasoning", False))
+                return bool(cfg_get(cfg, "display", "show_reasoning", default=False))
        except Exception:
            pass
        return False
@@ -1686,7 +1722,7 @@ class GatewayRunner:
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
-                    mode = str(cfg.get("display", {}).get("busy_input_mode", "") or "").strip().lower()
+                    mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
            except Exception:
                pass
        if mode == "queue":
@@ -1706,7 +1742,7 @@ class GatewayRunner:
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
-                    raw = str(cfg.get("agent", {}).get("restart_drain_timeout", "") or "").strip()
+                    raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
            except Exception:
                pass
        value = parse_restart_drain_timeout(raw)
@@ -1739,7 +1775,7 @@ class GatewayRunner:
                if cfg_path.exists():
                    with open(cfg_path, encoding="utf-8") as _f:
                        cfg = _y.safe_load(_f) or {}
-                    raw = cfg.get("display", {}).get("background_process_notifications")
+                    raw = cfg_get(cfg, "display", "background_process_notifications")
                    if raw is False:
                        mode = "off"
                    elif raw not in (None, ""):
@@ -2378,6 +2414,7 @@ class GatewayRunner:

        # Discover and load event hooks
        self.hooks.discover_and_load()
+
        
        # Recover background processes from checkpoint (crash recovery)
        try:
@@ -2454,7 +2491,7 @@ class GatewayRunner:
                error_message=None,
            )
            try:
-                success = await adapter.connect()
+                success = await self._connect_adapter_with_timeout(adapter, platform)
                if success:
                    self.adapters[platform] = adapter
                    self._sync_voice_mode_state_to_adapter(adapter)
@@ -2845,7 +2882,7 @@ class GatewayRunner:
                    adapter.set_session_store(self.session_store)
                    adapter.set_busy_session_handler(self._handle_active_session_busy_message)

-                    success = await adapter.connect()
+                    success = await self._connect_adapter_with_timeout(adapter, platform)
                    if success:
                        self.adapters[platform] = adapter
                        self._sync_voice_mode_state_to_adapter(adapter)
@@ -6444,7 +6481,7 @@ class GatewayRunner:

        try:
            config = _load_gateway_config()
-            personalities = config.get("agent", {}).get("personalities", {}) if config else {}
+            personalities = cfg_get(config, "agent", "personalities", default={})
        except Exception:
            config = {}
            personalities = {}
@@ -7445,7 +7482,7 @@ class GatewayRunner:
        # --- check config gate ------------------------------------------------
        try:
            user_config = _load_gateway_config()
-            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
+            gate_enabled = cfg_get(user_config, "display", "tool_progress_command", default=False)
        except Exception:
            gate_enabled = False

@@ -7809,6 +7846,13 @@ class GatewayRunner:
            return "Failed to switch session."
        self._clear_session_boundary_security_state(session_key)

+        # Evict any cached agent for this session so the next message
+        # rebuilds with the correct session_id end-to-end — mirrors
+        # /branch and /reset. Without this, the cached AIAgent (and its
+        # memory provider, which cached `_session_id` during initialize())
+        # keeps writing into the wrong session's record. See #6672.
+        self._evict_cached_agent(session_key)
+
        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name

@@ -10064,7 +10108,7 @@ class GatewayRunner:
                            tool_progress_hint_gateway,
                        )
                        _cfg = _load_gateway_config()
-                        gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False))
+                        gate_on = bool(cfg_get(_cfg, "display", "tool_progress_command", default=False))
                        if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
                            long_tool_hint_fired[0] = True
                            progress_queue.put(tool_progress_hint_gateway())
@@ -10221,6 +10265,20 @@ class GatewayRunner:
                        if progress_lines:
                            progress_lines[-1] = f"{base_msg} (×{count + 1})"
                        msg = progress_lines[-1] if progress_lines else base_msg
+                    elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
+                        # Content bubble just landed on the platform — close off
+                        # the current tool-progress bubble so the next tool
+                        # starts a fresh bubble below the content. Without this,
+                        # tool lines keep editing the ORIGINAL progress message
+                        # above the new content, making the chat appear out of
+                        # order. Mirrors GatewayStreamConsumer.on_segment_break
+                        # on the content side. (Issue: tool + content
+                        # linearization regression after PR #7885.)
+                        progress_msg_id = None
+                        progress_lines = []
+                        last_progress_msg[0] = None
+                        repeat_count[0] = 0
+                        continue
                    else:
                        msg = raw
                        progress_lines.append(msg)
@@ -10290,6 +10348,24 @@ class GatewayRunner:
                                _, base_msg, count = raw
                                if progress_lines:
                                    progress_lines[-1] = f"{base_msg} (×{count + 1})"
+                            elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
+                                # Content-bubble marker during drain: close off
+                                # the current progress bubble and start a fresh
+                                # one for any tool lines that arrived after.
+                                if can_edit and progress_lines and progress_msg_id:
+                                    _pending_text = "\n".join(progress_lines)
+                                    try:
+                                        await adapter.edit_message(
+                                            chat_id=source.chat_id,
+                                            message_id=progress_msg_id,
+                                            content=_pending_text,
+                                        )
+                                    except Exception:
+                                        pass
+                                progress_msg_id = None
+                                progress_lines = []
+                                last_progress_msg[0] = None
+                                repeat_count[0] = 0
                            else:
                                progress_lines.append(raw)
                        except Exception:
@@ -10495,6 +10571,11 @@ class GatewayRunner:
                            chat_id=source.chat_id,
                            config=_consumer_cfg,
                            metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
+                            on_new_message=(
+                                (lambda: progress_queue.put(("__reset__",)))
+                                if progress_queue is not None
+                                else None
+                            ),
                        )
                        if _want_stream_deltas:
                            def _stream_delta_cb(text: str) -> None:
@@ -11702,6 +11783,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
    PASTE_SWEEP_EVERY = 60   # ticks — once per hour
+    CURATOR_EVERY = 60       # ticks — poll hourly (inner gate handles the real cadence)

    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
@@ -11753,6 +11835,21 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
            except Exception as e:
                logger.debug("Paste sweep error: %s", e)

+        # Curator — piggy-back on the existing cron ticker so long-running
+        # gateways get weekly skill maintenance without needing restarts.
+        # maybe_run_curator() is internally gated by config.interval_hours
+        # (7 days by default), so CURATOR_EVERY is just the poll rate — the
+        # real work only fires once per config interval.
+        if tick_count % CURATOR_EVERY == 0:
+            try:
+                from agent.curator import maybe_run_curator
+                maybe_run_curator(
+                    idle_for_seconds=float("inf"),
+                    on_summary=lambda msg: logger.info("curator: %s", msg),
+                )
+            except Exception as e:
+                logger.debug("Curator tick error: %s", e)
+
        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")

@@ -91,11 +91,20 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
+        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
+        # Fired whenever a fresh content bubble is created on the platform
+        # (first-send of a new message, commentary, overflow chunk, or
+        # fallback continuation). The gateway uses this to linearize the
+        # tool-progress bubble: when content resumes after a tool batch,
+        # the next tool.started should open a NEW progress bubble below
+        # the content, not edit the old bubble above it.
+        # Called with no arguments. Exceptions are swallowed.
+        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
@@ -146,6 +155,16 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

+    def _notify_new_message(self) -> None:
+        """Fire the on_new_message callback, swallowing any errors."""
+        cb = self._on_new_message
+        if cb is None:
+            return
+        try:
+            cb()
+        except Exception:
+            logger.debug("on_new_message callback error", exc_info=True)
+
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
@@ -529,6 +548,9 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
+                # Fresh content bubble — close off any stale tool bubble
+                # above so the next tool starts a new bubble below.
+                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -661,6 +683,9 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
+            # Each fallback chunk is a fresh platform message — notify
+            # so any stale tool-progress bubble gets closed off.
+            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -744,6 +769,11 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            if result.success:
+                # Commentary counts as fresh content — close off any
+                # stale tool bubble above it so the next tool starts a
+                # new bubble below.
+                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
@@ -973,6 +1003,11 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
+                    # Notify the gateway that a fresh content bubble was
+                    # created so any accumulated tool-progress bubble above
+                    # gets closed off — the next tool fires into a new
+                    # bubble below, preserving chronological order.
+                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -72,6 +72,14 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
+MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
+MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
+MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
+MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
+MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
+MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
+MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
@@ -126,7 +134,7 @@ class ProviderConfig:
    """Describes a known inference provider."""
    id: str
    name: str
-    auth_type: str  # "oauth_device_code", "oauth_external", or "api_key"
+    auth_type: str  # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
    portal_base_url: str = ""
    inference_base_url: str = ""
    client_id: str = ""
@@ -255,6 +263,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": ProviderConfig(
+        id="minimax-oauth",
+        name="MiniMax (OAuth \u00b7 minimax.io)",
+        auth_type="oauth_minimax",
+        portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
+        inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
+        client_id=MINIMAX_OAUTH_CLIENT_ID,
+        scope=MINIMAX_OAUTH_SCOPE,
+        extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
+               "cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
+    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
@@ -1153,6 +1172,7 @@ def resolve_provider(
        "arcee-ai": "arcee", "arceeai": "arcee",
        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
+        "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -4116,6 +4136,326 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


+# ==================== MiniMax Portal OAuth ====================
+
+def _minimax_pkce_pair() -> tuple:
+    """Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
+    import secrets
+    verifier = secrets.token_urlsafe(64)[:96]
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).decode().rstrip("=")
+    state = secrets.token_urlsafe(16)
+    return verifier, challenge, state
+
+
+def _minimax_request_user_code(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    code_challenge: str, state: str,
+) -> Dict[str, Any]:
+    response = client.post(
+        f"{portal_base_url}/oauth/code",
+        data={
+            "response_type": "code",
+            "client_id": client_id,
+            "scope": MINIMAX_OAUTH_SCOPE,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+        },
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "application/json",
+            "x-request-id": str(uuid.uuid4()),
+        },
+    )
+    if response.status_code != 200:
+        raise AuthError(
+            f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="authorization_failed",
+        )
+    payload = response.json()
+    for field in ("user_code", "verification_uri", "expired_in"):
+        if field not in payload:
+            raise AuthError(
+                f"MiniMax OAuth response missing field: {field}",
+                provider="minimax-oauth", code="authorization_incomplete",
+            )
+    if payload.get("state") != state:
+        raise AuthError(
+            "MiniMax OAuth state mismatch (possible CSRF).",
+            provider="minimax-oauth", code="state_mismatch",
+        )
+    return payload
+
+
+def _minimax_poll_token(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
+) -> Dict[str, Any]:
+    # OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
+    # Defensive parsing: if it's small enough to be a duration, treat as seconds.
+    import time as _time
+    now_ms = int(_time.time() * 1000)
+    if expired_in > now_ms // 2:
+        # Looks like a unix-ms timestamp.
+        deadline = expired_in / 1000.0
+    else:
+        # Treat as duration in seconds from now.
+        deadline = _time.time() + max(1, expired_in)
+    interval = max(2.0, (interval_ms or 2000) / 1000.0)
+
+    while _time.time() < deadline:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": MINIMAX_OAUTH_GRANT_TYPE,
+                "client_id": client_id,
+                "user_code": user_code,
+                "code_verifier": code_verifier,
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+        try:
+            payload = response.json() if response.text else {}
+        except Exception:
+            payload = {}
+
+        if response.status_code != 200:
+            msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
+            raise AuthError(
+                f"MiniMax OAuth error: {msg or 'unknown'}",
+                provider="minimax-oauth", code="token_exchange_failed",
+            )
+
+        status = payload.get("status")
+        if status == "error":
+            raise AuthError(
+                "MiniMax OAuth reported an error. Please try again later.",
+                provider="minimax-oauth", code="authorization_denied",
+            )
+        if status == "success":
+            if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
+                raise AuthError(
+                    "MiniMax OAuth success payload missing required token fields.",
+                    provider="minimax-oauth", code="token_incomplete",
+                )
+            return payload
+        # "pending" or any other status -> keep polling
+        _time.sleep(interval)
+
+    raise AuthError(
+        "MiniMax OAuth timed out before authorization completed.",
+        provider="minimax-oauth", code="timeout",
+    )
+
+
+def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
+    """Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "minimax-oauth", auth_state)
+        _save_auth_store(auth_store)
+
+
+def _minimax_oauth_login(
+    *, region: str = "global", open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+) -> Dict[str, Any]:
+    """Run MiniMax OAuth flow, persist tokens, return auth state dict."""
+    pconfig = PROVIDER_REGISTRY["minimax-oauth"]
+    if region == "cn":
+        portal_base_url = pconfig.extra["cn_portal_base_url"]
+        inference_base_url = pconfig.extra["cn_inference_base_url"]
+    else:
+        portal_base_url = pconfig.portal_base_url
+        inference_base_url = pconfig.inference_base_url
+
+    verifier, challenge, state = _minimax_pkce_pair()
+
+    if _is_remote_session():
+        open_browser = False
+
+    print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
+    print(f"Portal: {portal_base_url}")
+
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
+                      headers={"Accept": "application/json"}) as client:
+        code_data = _minimax_request_user_code(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            code_challenge=challenge, state=state,
+        )
+        verification_url = str(code_data["verification_uri"])
+        user_code = str(code_data["user_code"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+        if open_browser:
+            if webbrowser.open(verification_url):
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically -- use the URL above.")
+
+        interval_raw = code_data.get("interval")
+        interval_ms = int(interval_raw) if interval_raw is not None else None
+        print("Waiting for approval...")
+
+        token_data = _minimax_poll_token(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            user_code=user_code, code_verifier=verifier,
+            expired_in=int(code_data["expired_in"]),
+            interval_ms=interval_ms,
+        )
+
+    now = datetime.now(timezone.utc)
+    expires_in_s = int(token_data["expired_in"])
+    expires_at = now.timestamp() + expires_in_s
+
+    auth_state = {
+        "provider": "minimax-oauth",
+        "region": region,
+        "portal_base_url": portal_base_url,
+        "inference_base_url": inference_base_url,
+        "client_id": pconfig.client_id,
+        "scope": MINIMAX_OAUTH_SCOPE,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data["refresh_token"],
+        "resource_url": token_data.get("resource_url"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    }
+
+    _minimax_save_auth_state(auth_state)
+    print("\u2713 MiniMax OAuth login successful.")
+    if msg := token_data.get("notification_message"):
+        print(f"Note from MiniMax: {msg}")
+    return auth_state
+
+
+def _refresh_minimax_oauth_state(
+    state: Dict[str, Any], *, timeout_seconds: float = 15.0,
+    force: bool = False,
+) -> Dict[str, Any]:
+    """Refresh MiniMax OAuth access token if close to expiry (or forced)."""
+    if not state.get("refresh_token"):
+        raise AuthError(
+            "MiniMax OAuth state has no refresh_token; please re-login.",
+            provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
+        )
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+    except Exception:
+        expires_at = 0.0
+    now = time.time()
+    if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
+        return state
+
+    portal_base_url = state["portal_base_url"]
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": "refresh_token",
+                "client_id": state["client_id"],
+                "refresh_token": state["refresh_token"],
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+    if response.status_code != 200:
+        body = response.text.lower()
+        relogin = any(m in body for m in
+                      ("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
+        raise AuthError(
+            f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=relogin,
+        )
+    payload = response.json()
+    if payload.get("status") != "success":
+        raise AuthError(
+            "MiniMax OAuth refresh did not return success.",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=True,
+        )
+    now_dt = datetime.now(timezone.utc)
+    expires_in_s = int(payload["expired_in"])
+    new_state = dict(state)
+    new_state.update({
+        "access_token": payload["access_token"],
+        "refresh_token": payload.get("refresh_token", state["refresh_token"]),
+        "obtained_at": now_dt.isoformat(),
+        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
+                                             tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    })
+    _minimax_save_auth_state(new_state)
+    return new_state
+
+
+def resolve_minimax_oauth_runtime_credentials(
+    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        raise AuthError(
+            "Not logged into MiniMax OAuth. Run `hermes model` and select "
+            "MiniMax (OAuth).",
+            provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+        )
+    state = _refresh_minimax_oauth_state(state)
+    return {
+        "provider": "minimax-oauth",
+        "api_key": state["access_token"],
+        "base_url": state["inference_base_url"].rstrip("/"),
+        "source": "oauth",
+    }
+
+
+def get_minimax_oauth_auth_status() -> Dict[str, Any]:
+    """Return auth status dict for MiniMax OAuth provider."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        return {"logged_in": False, "provider": "minimax-oauth"}
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+        token_valid = (expires_at - time.time()) > 0
+    except Exception:
+        token_valid = bool(state.get("access_token"))
+    return {
+        "logged_in": token_valid,
+        "provider": "minimax-oauth",
+        "region": state.get("region", "global"),
+        "expires_at": state.get("expires_at"),
+    }
+
+
+def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
+    """CLI entry for MiniMax OAuth login."""
+    region = getattr(args, "region", None) or "global"
+    open_browser = not getattr(args, "no_browser", False)
+    timeout = getattr(args, "timeout", None) or 15.0
+    try:
+        _minimax_oauth_login(
+            region=region, open_browser=open_browser, timeout_seconds=timeout,
+        )
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        raise SystemExit(1)
+
+
 def _nous_device_code_login(
    *,
    portal_base_url: Optional[str] = None,
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}


 def _get_custom_provider_names() -> list:
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -333,6 +333,27 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

+    if provider == "minimax-oauth":
+        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+        creds = resolve_minimax_oauth_runtime_credentials()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["api_key"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:minimax_oauth",
+            access_token=creds["api_key"],
+            base_url=creds.get("base_url"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")


@@ -0,0 +1,138 @@
+"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
+
+from __future__ import annotations
+
+import os
+import platform
+import shlex
+import shutil
+import subprocess
+
+from hermes_constants import get_hermes_home
+
+
+DEFAULT_BROWSER_CDP_PORT = 9222
+DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
+
+_DARWIN_APPS = (
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+)
+
+_WINDOWS_INSTALL_PARTS = (
+    ("Google", "Chrome", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chromium.exe"),
+    ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+    ("Microsoft", "Edge", "Application", "msedge.exe"),
+)
+
+_LINUX_BIN_NAMES = (
+    "google-chrome", "google-chrome-stable", "chromium-browser",
+    "chromium", "brave-browser", "microsoft-edge",
+)
+
+_WINDOWS_BIN_NAMES = (
+    "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+    "chrome", "msedge", "brave", "chromium",
+)
+
+
+def get_chrome_debug_candidates(system: str) -> list[str]:
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def add(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen or not os.path.isfile(path):
+            return
+        candidates.append(path)
+        seen.add(normalized)
+
+    def add_install_paths(bases: tuple[str | None, ...]) -> None:
+        for base in filter(None, bases):
+            for parts in _WINDOWS_INSTALL_PARTS:
+                add(os.path.join(base, *parts))
+
+    if system == "Darwin":
+        for app in _DARWIN_APPS:
+            add(app)
+        return candidates
+
+    if system == "Windows":
+        for name in _WINDOWS_BIN_NAMES:
+            add(shutil.which(name))
+        add_install_paths((
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ))
+        return candidates
+
+    for name in _LINUX_BIN_NAMES:
+        add(shutil.which(name))
+    add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
+    return candidates
+
+
+def chrome_debug_data_dir() -> str:
+    return str(get_hermes_home() / "chrome-debug")
+
+
+def _chrome_debug_args(port: int) -> list[str]:
+    return [
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={chrome_debug_data_dir()}",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+
+
+def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+
+    if candidates:
+        argv = [candidates[0], *_chrome_debug_args(port)]
+        return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
+
+    if system == "Darwin":
+        data_dir = chrome_debug_data_dir()
+        return (
+            f'open -a "Google Chrome" --args --remote-debugging-port={port} '
+            f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
+        )
+
+    return None
+
+
+def _detach_kwargs(system: str) -> dict:
+    if system != "Windows":
+        return {"start_new_session": True}
+    flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
+        subprocess, "CREATE_NEW_PROCESS_GROUP", 0
+    )
+    return {"creationflags": flags} if flags else {}
+
+
+def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+    if not candidates:
+        return False
+
+    os.makedirs(chrome_debug_data_dir(), exist_ok=True)
+    try:
+        subprocess.Popen(
+            [candidates[0], *_chrome_debug_args(port)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            **_detach_kwargs(system),
+        )
+        return True
+    except Exception:
+        return False
@@ -148,6 +148,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
@@ -180,8 +183,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("debug", "Upload debug report (system info + logs) and get shareable links", "Info"),

    # Exit
-    CommandDef("quit", "Exit the CLI", "Exit",
-               cli_only=True, aliases=("exit",)),
+    CommandDef("quit", "Exit the CLI (use --delete to also remove session history)", "Exit",
+               cli_only=True, aliases=("exit",), args_hint="[--delete]"),
 ]


@@ -499,7 +499,8 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
-        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
+        "vercel_runtime": "node24",
+        # Container resource limits (docker, singularity, modal, daytona, vercel_sandbox — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
        "container_disk": 51200,        # MB (default 50GB)
@@ -515,6 +516,16 @@ DEFAULT_CONFIG = {
        # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
        # Default off because passing host directories into a sandbox weakens isolation.
        "docker_mount_cwd_to_workspace": False,
+        # Explicit opt-in: run the Docker container as the host user's uid:gid
+        # (via `--user`).  When enabled, files written into bind-mounted dirs
+        # (docker_volumes, the persistent workspace, or the auto-mounted cwd)
+        # are owned by your host user instead of root, which avoids needing
+        # `sudo chown` after container runs. Default off to preserve behavior
+        # for images whose entrypoints expect to start as root (e.g. the
+        # bundled Hermes image, which drops to the `hermes` user via gosu).
+        # When on, SETUID/SETGID caps are omitted from the container since
+        # no privilege drop is needed.
+        "docker_run_as_host_user": False,
        # Persistent shell — keep a long-lived bash shell across execute() calls
        # so cwd/env vars/shell variables survive between commands.
        # Enabled by default for non-local backends (SSH); local is always opt-in
@@ -915,6 +926,35 @@ DEFAULT_CONFIG = {
        "guard_agent_created": False,
    },

+    # Curator — background skill maintenance.
+    #
+    # Periodically reviews AGENT-CREATED skills (never bundled or
+    # hub-installed) and keeps the collection tidy: marks long-unused skills
+    # as stale, archives genuinely obsolete ones (archive only, never
+    # deletes), and spawns a forked aux-model agent to consolidate overlaps
+    # and patch drift. Runs inactivity-triggered from session start — no
+    # cron daemon.
+    #
+    # See `hermes curator status` for the last run summary.
+    "curator": {
+        "enabled": True,
+        # How long to wait between curator runs (hours).  Default: 7 days.
+        "interval_hours": 24 * 7,
+        # Only run when the agent has been idle at least this long (hours).
+        "min_idle_hours": 2,
+        # Mark a skill as "stale" after this many days without use.
+        "stale_after_days": 30,
+        # Archive a skill (move to skills/.archive/) after this many days
+        # without use. Archived skills are recoverable — no auto-deletion.
+        "archive_after_days": 90,
+        # Optional per-task override for the curator's aux model. Leave null
+        # to use Hermes' main auxiliary client resolution.
+        "auxiliary": {
+            "provider": None,
+            "model": None,
+        },
+    },
+
    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
    # This section is only needed for hermes-specific overrides; everything else
    # (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
@@ -3448,6 +3488,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    return config


+def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
+    """Traverse nested dict keys safely, returning ``default`` on any miss.
+
+    Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
+    that appears 50+ times across the codebase. Handles three common gotchas
+    in one place:
+
+      1. Missing intermediate keys (returns ``default``, no KeyError).
+      2. An intermediate value that's not a dict (e.g. a user wrote a string
+         where a section was expected). Returns ``default`` instead of
+         AttributeError on ``.get()``.
+      3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
+
+    Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
+    ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
+    that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
+
+    Explicit ``None`` values are returned as-is (matches ``dict.get(key,
+    default)`` semantics — ``default`` is only returned when the key is
+    *absent*, not when it's present but set to ``None``).
+
+    Examples:
+        >>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
+        'high'
+        >>> cfg_get({}, "agent", "reasoning_effort", default="medium")
+        'medium'
+        >>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
+        'low'
+        >>> cfg_get(None, "anything", default=42)
+        42
+        >>> cfg_get({"a": {"b": None}}, "a", "b", default="def")  # explicit None preserved
+        >>> cfg_get({"a": {"b": False}}, "a", "b", default=True)  # falsy values preserved
+        False
+    """
+    if not isinstance(cfg, dict):
+        return default
+    node: Any = cfg
+    for key in keys:
+        if not isinstance(node, dict):
+            return default
+        if key not in node:
+            return default
+        node = node[key]
+    return node
+
+

 def read_raw_config() -> Dict[str, Any]:
    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
@@ -3710,18 +3796,27 @@ def _sanitize_env_lines(lines: list) -> list:

        # Detect concatenated KEY=VALUE pairs on one line.
        # Search for known KEY= patterns at any position in the line.
-        split_positions = []
+        # We collect full needle ranges so we can drop matches that are
+        # fully contained within a longer overlapping needle. Without this,
+        # suffix collisions corrupt the file: e.g. LM_API_KEY= inside
+        # GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
+        match_ranges: list[tuple[int, int]] = []
        for key_name in known_keys:
            needle = key_name + "="
            idx = stripped.find(needle)
            while idx >= 0:
-                split_positions.append(idx)
+                match_ranges.append((idx, idx + len(needle)))
                idx = stripped.find(needle, idx + len(needle))

+        split_positions = sorted({
+            s for s, e in match_ranges
+            if not any(
+                s2 <= s and e2 >= e and (s2, e2) != (s, e)
+                for s2, e2 in match_ranges
+            )
+        })
+
        if len(split_positions) > 1:
-            split_positions.sort()
-            # Deduplicate (shouldn't happen, but be safe)
-            split_positions = sorted(set(split_positions))
            for i, pos in enumerate(split_positions):
                end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
                part = stripped[pos:end].strip()
@@ -4099,6 +4194,9 @@ def show_config():
        print(f"  Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
        daytona_key = get_env_value('DAYTONA_API_KEY')
        print(f"  API key:      {'configured' if daytona_key else '(not set)'}")
+    elif terminal.get('backend') == 'vercel_sandbox':
+        print(f"  Vercel runtime: {terminal.get('vercel_runtime', 'node24')}")
+        print(f"  Vercel auth:    {'configured' if get_env_value('VERCEL_OIDC_TOKEN') or (get_env_value('VERCEL_TOKEN') and get_env_value('VERCEL_PROJECT_ID') and get_env_value('VERCEL_TEAM_ID')) else '(not set)'}")
    elif terminal.get('backend') == 'ssh':
        ssh_host = get_env_value('TERMINAL_SSH_HOST')
        ssh_user = get_env_value('TERMINAL_SSH_USER')
@@ -4291,7 +4389,9 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
@@ -0,0 +1,235 @@
+"""CLI subcommand: `hermes curator <subcommand>`.
+
+Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
+table, triggers a run, pauses/resumes, and pins/unpins skills.
+
+This module intentionally has no side effects at import time — main.py wires
+the argparse subparsers on demand.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime, timezone
+from typing import Optional
+
+
+def _fmt_ts(ts: Optional[str]) -> str:
+    if not ts:
+        return "never"
+    try:
+        dt = datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return str(ts)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    delta = datetime.now(timezone.utc) - dt
+    secs = int(delta.total_seconds())
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _cmd_status(args) -> int:
+    from agent import curator
+    from tools import skill_usage
+
+    state = curator.load_state()
+    enabled = curator.is_enabled()
+    paused = state.get("paused", False)
+    last_run = state.get("last_run_at")
+    summary = state.get("last_run_summary") or "(none)"
+    runs = state.get("run_count", 0)
+
+    status_line = (
+        "ENABLED" if enabled and not paused else
+        "PAUSED" if paused else
+        "DISABLED"
+    )
+    print(f"curator: {status_line}")
+    print(f"  runs:           {runs}")
+    print(f"  last run:       {_fmt_ts(last_run)}")
+    print(f"  last summary:   {summary}")
+    _report = state.get("last_report_path")
+    if _report:
+        print(f"  last report:    {_report}")
+    _ih = curator.get_interval_hours()
+    _interval_label = (
+        f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
+        else f"{_ih}h"
+    )
+    print(f"  interval:       every {_interval_label}")
+    print(f"  stale after:    {curator.get_stale_after_days()}d unused")
+    print(f"  archive after:  {curator.get_archive_after_days()}d unused")
+
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        print("\nno agent-created skills")
+        return 0
+
+    by_state = {"active": [], "stale": [], "archived": []}
+    pinned = []
+    for r in rows:
+        state_name = r.get("state", "active")
+        by_state.setdefault(state_name, []).append(r)
+        if r.get("pinned"):
+            pinned.append(r["name"])
+
+    print(f"\nagent-created skills: {len(rows)} total")
+    for state_name in ("active", "stale", "archived"):
+        bucket = by_state.get(state_name, [])
+        print(f"  {state_name:10s} {len(bucket)}")
+
+    if pinned:
+        print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
+
+    # Show top 5 least-recently-used active skills
+    active = sorted(
+        by_state.get("active", []),
+        key=lambda r: r.get("last_used_at") or r.get("created_at") or "",
+    )[:5]
+    if active:
+        print("\nleast recently used (top 5):")
+        for r in active:
+            last = _fmt_ts(r.get("last_used_at"))
+            print(f"  {r['name']:40s}  use={r.get('use_count', 0):3d}  last_used={last}")
+
+    return 0
+
+
+def _cmd_run(args) -> int:
+    from agent import curator
+    if not curator.is_enabled():
+        print("curator: disabled via config; enable with `curator.enabled: true`")
+        return 1
+
+    print("curator: running review pass...")
+
+    def _on_summary(msg: str) -> None:
+        print(msg)
+
+    result = curator.run_curator_review(
+        on_summary=_on_summary,
+        synchronous=bool(args.synchronous),
+    )
+    auto = result.get("auto_transitions", {})
+    if auto:
+        print(
+            f"auto: checked={auto.get('checked', 0)} "
+            f"stale={auto.get('marked_stale', 0)} "
+            f"archived={auto.get('archived', 0)} "
+            f"reactivated={auto.get('reactivated', 0)}"
+        )
+    if not args.synchronous:
+        print("llm pass running in background — check `hermes curator status` later")
+    return 0
+
+
+def _cmd_pause(args) -> int:
+    from agent import curator
+    curator.set_paused(True)
+    print("curator: paused")
+    return 0
+
+
+def _cmd_resume(args) -> int:
+    from agent import curator
+    curator.set_paused(False)
+    print("curator: resumed")
+    return 0
+
+
+def _cmd_pin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
+            "(only agent-created skills participate in curation)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, True)
+    print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
+    return 0
+
+
+def _cmd_unpin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — "
+            "there's nothing to unpin (curator only tracks agent-created skills)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, False)
+    print(f"curator: unpinned '{args.skill}'")
+    return 0
+
+
+def _cmd_restore(args) -> int:
+    from tools import skill_usage
+    ok, msg = skill_usage.restore_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+# ---------------------------------------------------------------------------
+# argparse wiring (called from hermes_cli.main)
+# ---------------------------------------------------------------------------
+
+def register_cli(parent: argparse.ArgumentParser) -> None:
+    """Attach `curator` subcommands to *parent*.
+
+    main.py calls this with the ArgumentParser returned by
+    ``subparsers.add_parser("curator", ...)``.
+    """
+    parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
+    subs = parent.add_subparsers(dest="curator_command")
+
+    p_status = subs.add_parser("status", help="Show curator status and skill stats")
+    p_status.set_defaults(func=_cmd_status)
+
+    p_run = subs.add_parser("run", help="Trigger a curator review now")
+    p_run.add_argument(
+        "--sync", "--synchronous", dest="synchronous", action="store_true",
+        help="Wait for the LLM review pass to finish (default: background thread)",
+    )
+    p_run.set_defaults(func=_cmd_run)
+
+    p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
+    p_pause.set_defaults(func=_cmd_pause)
+
+    p_resume = subs.add_parser("resume", help="Resume a paused curator")
+    p_resume.set_defaults(func=_cmd_resume)
+
+    p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
+    p_pin.add_argument("skill", help="Skill name")
+    p_pin.set_defaults(func=_cmd_pin)
+
+    p_unpin = subs.add_parser("unpin", help="Unpin a skill")
+    p_unpin.add_argument("skill", help="Skill name")
+    p_unpin.set_defaults(func=_cmd_unpin)
+
+    p_restore = subs.add_parser("restore", help="Restore an archived skill")
+    p_restore.add_argument("skill", help="Skill name")
+    p_restore.set_defaults(func=_cmd_restore)
+
+
+def cli_main(argv=None) -> int:
+    """Standalone entry (also usable by hermes_cli.main fallthrough)."""
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    register_cli(parser)
+    args = parser.parse_args(argv)
+    fn = getattr(args, "func", None)
+    if fn is None:
+        parser.print_help()
+        return 0
+    return int(fn(args) or 0)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(cli_main())
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
@@ -30,6 +31,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
+from hermes_cli.vercel_auth import describe_vercel_auth
 from hermes_constants import OPENROUTER_MODELS_URL
 from utils import base_url_host_matches

@@ -537,6 +539,7 @@ def run_doctor(args):
            get_nous_auth_status,
            get_codex_auth_status,
            get_gemini_oauth_auth_status,
+            get_minimax_oauth_auth_status,
        )

        nous_status = get_nous_auth_status()
@@ -566,6 +569,13 @@ def run_doctor(args):
            check_ok("Google Gemini OAuth", f"(logged in{suffix})")
        else:
            check_warn("Google Gemini OAuth", "(not logged in)")
+
+        minimax_status = get_minimax_oauth_auth_status()
+        if minimax_status.get("logged_in"):
+            region = minimax_status.get("region", "global")
+            check_ok("MiniMax OAuth", f"(logged in, region={region})")
+        else:
+            check_warn("MiniMax OAuth", "(not logged in)")
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

@@ -863,6 +873,50 @@ def run_doctor(args):
            check_fail("daytona SDK not installed", "(pip install daytona)")
            issues.append("Install daytona SDK: pip install daytona")

+    # Vercel Sandbox (if using vercel_sandbox backend)
+    if terminal_env == "vercel_sandbox":
+        runtime = os.getenv("TERMINAL_VERCEL_RUNTIME", "node24").strip() or "node24"
+        from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+        if runtime in _SUPPORTED_VERCEL_RUNTIMES:
+            check_ok("Vercel runtime", f"({runtime})")
+        else:
+            supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+            check_fail("Vercel runtime unsupported", f"({runtime}; use {supported})")
+            issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")
+
+        disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
+        if disk in ("", "0", "51200"):
+            check_ok("Vercel disk setting", "(uses platform default)")
+        else:
+            check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
+            issues.append("Vercel Sandbox does not support custom container_disk; use the shared default 51200")
+
+        if importlib.util.find_spec("vercel") is not None:
+            check_ok("vercel SDK", "(installed)")
+        else:
+            check_fail("vercel SDK not installed", "(pip install 'hermes-agent[vercel]')")
+            issues.append("Install the Vercel optional dependency: pip install 'hermes-agent[vercel]'")
+
+        auth_status = describe_vercel_auth()
+        if auth_status.ok:
+            check_ok("Vercel auth", f"({auth_status.label})")
+        elif auth_status.label.startswith("partial"):
+            check_fail("Vercel auth incomplete", f"({auth_status.label})")
+            issues.append("Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together")
+        else:
+            check_fail("Vercel auth not configured", f"({auth_status.label})")
+            issues.append(
+                "Configure Vercel Sandbox auth with VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID"
+            )
+        for line in auth_status.detail_lines:
+            check_info(f"Vercel auth {line}")
+
+        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
+        if persistent:
+            check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
+        else:
+            check_info("Vercel persistence: ephemeral filesystem")
+
    # Node.js + agent-browser (for browser automation tools)
    if shutil.which("node"):
        check_ok("Node.js")
@@ -279,9 +279,11 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
                capture_output=True,
                text=True,
+                encoding="utf-8",
+                errors="ignore",
                timeout=10,
            )
-            if result.returncode != 0:
+            if result.returncode != 0 or result.stdout is None:
                return []
            current_cmd = ""
            for line in result.stdout.split("\n"):
@@ -830,6 +832,22 @@ def _user_dbus_socket_path() -> Path:
    return Path(xdg) / "bus"


+def _user_systemd_private_socket_path() -> Path:
+    """Return the per-user systemd private socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "systemd" / "private"
+
+
+def _user_systemd_socket_ready() -> bool:
+    """Return True when user-scope systemd has a reachable control socket.
+
+    Some distros expose only the per-user systemd private socket even when the
+    D-Bus session bus socket is absent. ``systemctl --user`` can still work in
+    that configuration, so preflight checks must treat either socket as valid.
+    """
+    return _user_dbus_socket_path().exists() or _user_systemd_private_socket_path().exists()
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -853,28 +871,29 @@ def _ensure_user_systemd_env() -> None:


 def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+    """Poll for the user systemd runtime socket(s), up to ``timeout`` seconds.

-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    Linger-enabled user@.service can take a second or two to spawn its control
+    socket(s) after ``loginctl enable-linger`` runs. Returns True once either
+    the user D-Bus socket or the per-user systemd private socket exists.
    """
    import time

    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
+        if _user_systemd_socket_ready():
            _ensure_user_systemd_env()
            return True
        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
+    return _user_systemd_socket_ready()


 def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+    """Ensure ``systemctl --user`` will reach the user-scope systemd instance.

-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
+    No-op when the user D-Bus socket or per-user systemd private socket is
+    already there (the common case on desktops and linger-enabled servers). On
+    fresh SSH sessions where both are missing:

    * If linger is already enabled, wait briefly for user@.service to spawn
      the socket.
@@ -888,8 +907,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
    systemd operations and surface the message to the user.
    """
    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
+    if _user_systemd_socket_ready():
        return

    import getpass
@@ -903,7 +921,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
        # Linger is on but socket still missing — unusual; fall through to error.
        _raise_user_systemd_unavailable(
            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
+            reason="User systemd control sockets are missing even though linger is enabled.",
            fix_hint=(
                f"  systemctl start user@{os.getuid()}.service\n"
                "  (may require sudo; try again after the command succeeds)"
@@ -3277,6 +3295,12 @@ def _setup_weixin():
        print_warning("  Direct messages disabled.")

    print()
+    print_info("  Note: QR login connects an iLink bot identity (e.g. ...@im.bot), not a")
+    print_info("  scriptable personal WeChat account. Ordinary WeChat groups typically cannot")
+    print_info("  invite an @im.bot identity, and iLink does not deliver ordinary-group events")
+    print_info("  to most bot accounts. The settings below only apply when iLink actually")
+    print_info("  delivers group events for your account type — otherwise DM remains the only")
+    print_info("  working channel regardless of this choice.")
    group_choices = [
        "Disable group chats (recommended)",
        "Allow all group chats",
@@ -3290,12 +3314,12 @@ def _setup_weixin():
    elif group_idx == 1:
        save_env_value("WEIXIN_GROUP_POLICY", "open")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
-        print_warning("  All group chats enabled.")
+        print_warning("  All group chats enabled (only takes effect if iLink delivers group events).")
    else:
-        allow_groups = prompt("  Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
+        allow_groups = prompt("  Allowed group chat IDs (comma-separated, not member user IDs)", "", password=False).replace(" ", "")
        save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
-        print_success("  Group allowlist saved.")
+        print_success("  Group allowlist saved (only takes effect if iLink delivers group events).")

    if user_id:
        print()
@@ -1094,11 +1094,36 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
    return [node, str(root / "dist" / "entry.js")], root


+def _normalize_tui_toolsets(toolsets: object) -> list[str]:
+    """Normalize argparse/Fire-style toolset input for the TUI subprocess."""
+    try:
+        from hermes_cli.oneshot import _normalize_toolsets
+
+        return _normalize_toolsets(toolsets) or []
+    except (AttributeError, ImportError):
+        if not toolsets:
+            return []
+
+        raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
+        if not isinstance(raw_items, (list, tuple)):
+            raw_items = [raw_items]
+
+        normalized: list[str] = []
+        for item in raw_items:
+            if isinstance(item, str):
+                normalized.extend(part.strip() for part in item.split(","))
+            else:
+                normalized.append(str(item).strip())
+
+        return [item for item in normalized if item]
+
+
 def _launch_tui(
    resume_session_id: Optional[str] = None,
    tui_dev: bool = False,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
 ):
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"
@@ -1123,6 +1148,9 @@ def _launch_tui(
    if provider:
        env["HERMES_TUI_PROVIDER"] = provider
        env["HERMES_INFERENCE_PROVIDER"] = provider
+    tui_toolsets = _normalize_tui_toolsets(toolsets)
+    if tui_toolsets:
+        env["HERMES_TUI_TOOLSETS"] = ",".join(tui_toolsets)
    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
    # large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1270,6 +1298,7 @@ def cmd_chat(args):
            tui_dev=getattr(args, "tui_dev", False),
            model=getattr(args, "model", None),
            provider=getattr(args, "provider", None),
+            toolsets=getattr(args, "toolsets", None),
        )

    # Import and run the CLI
@@ -1770,6 +1799,8 @@ def select_provider_and_model(args=None):
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "qwen-oauth":
        _model_flow_qwen_oauth(config, current_model)
+    elif selected_provider == "minimax-oauth":
+        _model_flow_minimax_oauth(config, current_model, args=args)
    elif selected_provider == "google-gemini-cli":
        _model_flow_google_gemini_cli(config, current_model)
    elif selected_provider == "copilot-acp":
@@ -2658,6 +2689,53 @@ def _model_flow_qwen_oauth(_config, current_model=""):
        print("No change.")


+def _model_flow_minimax_oauth(config, current_model="", args=None):
+    """MiniMax OAuth provider: ensure logged in, then pick model."""
+    from hermes_cli.auth import (
+        get_provider_auth_state,
+        _prompt_model_selection,
+        _save_model_choice,
+        _update_config_for_provider,
+        resolve_minimax_oauth_runtime_credentials,
+        AuthError,
+        format_auth_error,
+        _login_minimax_oauth,
+        PROVIDER_REGISTRY,
+    )
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        print("Not logged into MiniMax. Starting OAuth login...")
+        print()
+        try:
+            mock_args = argparse.Namespace(
+                region=getattr(args, "region", None) or "global",
+                no_browser=bool(getattr(args, "no_browser", False)),
+                timeout=getattr(args, "timeout", None) or 15.0,
+            )
+            _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
+        except SystemExit:
+            print("Login cancelled or failed.")
+            return
+        except Exception as exc:
+            print(f"Login failed: {exc}")
+            return
+
+    try:
+        creds = resolve_minimax_oauth_runtime_credentials()
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        return
+
+    from hermes_cli.models import _PROVIDER_MODELS
+    model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
+    selected = _prompt_model_selection(model_ids, current_model)
+    if not selected:
+        return
+    _save_model_choice(selected)
+    _update_config_for_provider("minimax-oauth", creds["base_url"])
+    print(f"\u2713 Using MiniMax model: {selected}")
+
+
 def _model_flow_google_gemini_cli(_config, current_model=""):
    """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.

@@ -5274,12 +5352,20 @@ def _warn_stale_dashboard_processes() -> None:

    try:
        if sys.platform == "win32":
+            # wmic may emit text in the system code page (for example cp936
+            # on zh-CN systems), not UTF-8. In text mode, subprocess output
+            # decoding depends on Python's configuration (locale-dependent
+            # by default, or UTF-8 in UTF-8 mode). The important protection
+            # here is errors="ignore": it prevents a reader-thread
+            # UnicodeDecodeError from leaving result.stdout=None and turning
+            # the later .split() into an AttributeError (#17049).
            result = subprocess.run(
                ["wmic", "process", "get", "ProcessId,CommandLine",
                 "/FORMAT:LIST"],
                capture_output=True, text=True, timeout=10,
+                encoding="utf-8", errors="ignore",
            )
-            if result.returncode != 0:
+            if result.returncode != 0 or result.stdout is None:
                return
            current_cmd = ""
            for line in result.stdout.split("\n"):
@@ -7830,6 +7916,12 @@ For more help on a command:
            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
        ),
    )
+    parser.add_argument(
+        "-t",
+        "--toolsets",
+        default=None,
+        help="Comma-separated toolsets to enable for this invocation. Applies to -z/--oneshot and --tui.",
+    )
    parser.add_argument(
        "--resume",
        "-r",
@@ -9230,6 +9322,26 @@ Examples:
    except Exception as _exc:
        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)

+    # =========================================================================
+    # curator command — background skill maintenance
+    # =========================================================================
+    curator_parser = subparsers.add_parser(
+        "curator",
+        help="Background skill maintenance (curator) — status, run, pause, pin",
+        description=(
+            "The curator is an auxiliary-model background task that "
+            "periodically reviews agent-created skills, prunes stale ones, "
+            "consolidates overlaps, and archives obsolete skills. "
+            "Bundled and hub-installed skills are never touched. "
+            "Archives are recoverable; auto-deletion never happens."
+        ),
+    )
+    try:
+        from hermes_cli.curator import register_cli as _register_curator_cli
+        _register_curator_cli(curator_parser)
+    except Exception as _exc:
+        logging.getLogger(__name__).debug("curator CLI wiring failed: %s", _exc)
+
    # =========================================================================
    # memory command
    # =========================================================================
@@ -10250,6 +10362,7 @@ Examples:
            args.oneshot,
            model=getattr(args, "model", None),
            provider=getattr(args, "provider", None),
+            toolsets=getattr(args, "toolsets", None),
        ))

    # Handle top-level --resume / --continue as shortcut to chat
@@ -16,6 +16,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple

 from hermes_cli.config import (
+    cfg_get,
    load_config,
    save_config,
    get_env_value,
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):

    # Update config
    config = load_config()
-    server_entry = config.get("mcp_servers", {}).get(name, {})
+    server_entry = cfg_get(config, "mcp_servers", name, default={})

    if len(chosen) == total:
        # All selected → remove include/exclude (register all)
@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "kimi-coding",
    "kimi-coding-cn",
    "minimax",
+    "minimax-oauth",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
@@ -1018,6 +1018,37 @@ def list_authenticated_providers(
    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
+    # Effective base URLs of every built-in row we emit (normalized lower+rstrip).
+    # Section 4 uses this to hide ``custom_providers`` entries that point at the
+    # same endpoint as a built-in (e.g. a user-defined "my-dashscope" on
+    # https://coding-intl.dashscope.aliyuncs.com/v1 collides with the built-in
+    # alibaba-coding-plan row when DASHSCOPE_API_KEY is present). Fixes #16970.
+    _builtin_endpoints: set = set()
+
+    def _norm_url(url: str) -> str:
+        return str(url or "").strip().rstrip("/").lower()
+
+    def _record_builtin_endpoint(slug: str) -> None:
+        """Record the effective base URL for a built-in provider row.
+
+        Prefers the live env-override (e.g. DASHSCOPE_BASE_URL) over the
+        static inference_base_url so the dedup matches what a user typing
+        that URL into custom_providers would actually hit."""
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY as _reg
+        except Exception:
+            return
+        pcfg = _reg.get(slug)
+        if not pcfg:
+            return
+        url = ""
+        if getattr(pcfg, "base_url_env_var", ""):
+            url = os.environ.get(pcfg.base_url_env_var, "") or ""
+        if not url:
+            url = getattr(pcfg, "inference_base_url", "") or ""
+        normed = _norm_url(url)
+        if normed:
+            _builtin_endpoints.add(normed)

    data = fetch_models_dev()

@@ -1124,6 +1155,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(slug.lower())
        seen_mdev_ids.add(mdev_id)
+        _record_builtin_endpoint(slug)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@@ -1238,6 +1270,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(pid.lower())
        seen_slugs.add(hermes_slug.lower())
+        _record_builtin_endpoint(hermes_slug)

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -1317,6 +1350,7 @@ def list_authenticated_providers(
            "source": "canonical",
        })
        seen_slugs.add(_cp.slug.lower())
+        _record_builtin_endpoint(_cp.slug)

    # --- 3. User-defined endpoints from config ---
    # Track (name, base_url) of what section 3 emits so section 4 can skip
@@ -1526,6 +1560,15 @@ def list_authenticated_providers(
            )
            if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
                continue
+            # Skip if a built-in row (sections 1/2/2b) already represents this
+            # endpoint. Fixes #16970: a user-defined "my-dashscope" pointing at
+            # https://coding-intl.dashscope.aliyuncs.com/v1 duplicates the
+            # built-in alibaba-coding-plan row whenever DASHSCOPE_API_KEY is
+            # set. The built-in row carries the curated model list, correct
+            # auth wiring, and canonical slug — keep it and hide the shadow.
+            _grp_url_norm = _pair_key[1]
+            if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
+                continue
            results.append({
                "slug": slug,
                "name": grp["name"],
@@ -288,6 +288,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "MiniMax-M2.1",
        "MiniMax-M2",
    ],
+    "minimax-oauth": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
+    ],
    "minimax-cn": [
        "MiniMax-M2.7",
        "MiniMax-M2.5",
@@ -788,6 +792,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
+    ProviderEntry("minimax-oauth",  "MiniMax (OAuth)",          "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
@@ -831,6 +836,9 @@ _PROVIDER_ALIASES = {
    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
+    "minimax-portal": "minimax-oauth",
+    "minimax-global": "minimax-oauth",
+    "minimax_oauth": "minimax-oauth",
    "claude": "anthropic",
    "claude-code": "anthropic",
    "deep-seek": "deepseek",
@@ -3,7 +3,8 @@
 Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
 no stderr chatter.  Just the agent's final text to stdout.

-Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Toolsets = explicit --toolsets when provided, otherwise whatever the user has
+configured for "cli" in `hermes tools`.
 Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
 Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
 Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
@@ -28,10 +29,103 @@ from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional


+def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
+    if not toolsets:
+        return None
+
+    raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
+    if not isinstance(raw_items, (list, tuple)):
+        raw_items = [raw_items]
+
+    normalized: list[str] = []
+    for item in raw_items:
+        if isinstance(item, str):
+            normalized.extend(part.strip() for part in item.split(","))
+        else:
+            normalized.append(str(item).strip())
+
+    return [item for item in normalized if item] or None
+
+
+def _validate_explicit_toolsets(toolsets: object = None) -> tuple[list[str] | None, str | None]:
+    normalized = _normalize_toolsets(toolsets)
+    if normalized is None:
+        return None, None
+
+    try:
+        from toolsets import validate_toolset
+    except Exception as exc:
+        return None, f"hermes -z: failed to validate --toolsets: {exc}\n"
+
+    built_in = [name for name in normalized if validate_toolset(name)]
+    unresolved = [name for name in normalized if name not in built_in]
+
+    if unresolved:
+        try:
+            from hermes_cli.plugins import discover_plugins
+
+            discover_plugins()
+            plugin_valid = [name for name in unresolved if validate_toolset(name)]
+        except Exception:
+            plugin_valid = []
+
+        if plugin_valid:
+            built_in.extend(plugin_valid)
+            unresolved = [name for name in unresolved if name not in plugin_valid]
+
+    if any(name in {"all", "*"} for name in built_in):
+        ignored = [name for name in normalized if name not in {"all", "*"}]
+        if ignored:
+            sys.stderr.write(
+                "hermes -z: --toolsets all enables every toolset; "
+                f"ignoring additional entries: {', '.join(ignored)}\n"
+            )
+        return None, None
+
+    mcp_names: set[str] = set()
+    mcp_disabled: set[str] = set()
+    if unresolved:
+        try:
+            from hermes_cli.config import read_raw_config
+            from hermes_cli.tools_config import _parse_enabled_flag
+
+            cfg = read_raw_config()
+            mcp_servers = cfg.get("mcp_servers") if isinstance(cfg.get("mcp_servers"), dict) else {}
+            for name, server_cfg in mcp_servers.items():
+                if not isinstance(server_cfg, dict):
+                    continue
+                if _parse_enabled_flag(server_cfg.get("enabled", True), default=True):
+                    mcp_names.add(str(name))
+                else:
+                    mcp_disabled.add(str(name))
+        except Exception:
+            mcp_names = set()
+            mcp_disabled = set()
+
+    mcp_valid = [name for name in unresolved if name in mcp_names]
+    disabled = [name for name in unresolved if name in mcp_disabled]
+    unknown = [name for name in unresolved if name not in mcp_names and name not in mcp_disabled]
+    valid = built_in + mcp_valid
+
+    if unknown:
+        sys.stderr.write(f"hermes -z: ignoring unknown --toolsets entries: {', '.join(unknown)}\n")
+    if disabled:
+        sys.stderr.write(
+            "hermes -z: ignoring disabled MCP servers (set enabled: true in config.yaml to use): "
+            f"{', '.join(disabled)}\n"
+        )
+
+    if not valid:
+        return None, "hermes -z: --toolsets did not contain any valid toolsets.\n"
+
+    return valid, None
+
+
 def run_oneshot(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
 ) -> int:
    """Execute a single prompt and print only the final content block.

@@ -42,6 +136,7 @@ def run_oneshot(
        provider: Optional provider override. Falls back to
            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
            then "auto".
+        toolsets: Optional comma-separated string or iterable of toolsets.

    Returns the exit code.  Caller should sys.exit() with the return.
    """
@@ -65,6 +160,12 @@ def run_oneshot(
        )
        return 2

+    explicit_toolsets, toolsets_error = _validate_explicit_toolsets(toolsets)
+    if toolsets_error:
+        sys.stderr.write(toolsets_error)
+        return 2
+    use_config_toolsets = _normalize_toolsets(toolsets) is None
+
    # Auto-approve any shell / tool approvals.  Non-interactive by
    # definition — a prompt would hang forever.
    os.environ["HERMES_YOLO_MODE"] = "1"
@@ -77,7 +178,13 @@ def run_oneshot(

    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(prompt, model=model, provider=provider)
+            response = _run_agent(
+                prompt,
+                model=model,
+                provider=provider,
+                toolsets=explicit_toolsets,
+                use_config_toolsets=use_config_toolsets,
+            )
    finally:
        try:
            devnull.close()
@@ -96,6 +203,8 @@ def _run_agent(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
+    use_config_toolsets: bool = True,
 ) -> str:
    """Build an AIAgent exactly like a normal CLI chat turn would, then
    run a single conversation.  Returns the final response string."""
@@ -168,9 +277,12 @@ def _run_agent(
        explicit_base_url=explicit_base_url_from_alias,
    )

-    # Pull in whatever toolsets the user has enabled for "cli".
-    # sorted() gives stable ordering; set→list for AIAgent's signature.
-    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+    # Pull in explicit toolsets when provided; otherwise use whatever the user
+    # has enabled for "cli". sorted() gives stable ordering for config-derived
+    # sets; explicit values preserve user order.
+    toolsets_list = _normalize_toolsets(toolsets)
+    if toolsets_list is None and use_config_toolsets:
+        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

    agent = AIAgent(
        api_key=runtime.get("api_key"),
@@ -45,6 +45,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union

 from hermes_constants import get_hermes_home
 from utils import env_var_enabled
+from hermes_cli.config import cfg_get

 try:
    import yaml
@@ -115,7 +116,7 @@ def _get_disabled_plugins() -> set:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        disabled = config.get("plugins", {}).get("disabled", [])
+        disabled = cfg_get(config, "plugins", "disabled", default=[])
        return set(disabled) if isinstance(disabled, list) else set()
    except Exception:
        return set()
@@ -18,6 +18,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_home
+from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

@@ -519,7 +520,7 @@ def _get_disabled_set() -> set:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        disabled = config.get("plugins", {}).get("disabled", [])
+        disabled = cfg_get(config, "plugins", "disabled", default=[])
        return set(disabled) if isinstance(disabled, list) else set()
    except Exception:
        return set()
@@ -763,7 +764,7 @@ def _get_current_memory_provider() -> str:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        return config.get("memory", {}).get("provider", "") or ""
+        return cfg_get(config, "memory", "provider", default="") or ""
    except Exception:
        return ""

@@ -773,7 +774,7 @@ def _get_current_context_engine() -> str:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        return config.get("context", {}).get("engine", "compressor") or "compressor"
+        return cfg_get(config, "context", "engine", default="compressor") or "compressor"
    except Exception:
        return "compressor"

@@ -71,6 +71,29 @@ _CLONE_ALL_STRIP = [
    "processes.json",
 ]

+
+def _clone_all_copytree_ignore(source_dir: Path):
+    """Ignore ``profiles/`` at the root of *source_dir* only.
+
+    ``~/.hermes`` contains ``profiles/<name>/`` for sibling named profiles.
+    ``shutil.copytree`` would otherwise duplicate that entire tree inside the
+    new profile (recursive ``.../profiles/.../profiles/...``). Export already
+    excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that
+    behavior for ``--clone-all``.
+    """
+    source_resolved = source_dir.resolve()
+
+    def _ignore(directory: str, names: List[str]) -> List[str]:
+        try:
+            if Path(directory).resolve() == source_resolved:
+                return [n for n in names if n == "profiles"]
+        except (OSError, ValueError):
+            pass
+        return []
+
+    return _ignore
+
+
 # Directories/files to exclude when exporting the default (~/.hermes) profile.
 # The default profile contains infrastructure (repo checkout, worktrees, DBs,
 # caches, binaries) that named profiles don't have.  We exclude those so the
@@ -424,8 +447,12 @@ def create_profile(
            )

    if clone_all and source_dir:
-        # Full copy of source profile
-        shutil.copytree(source_dir, profile_dir)
+        # Full copy of source profile (exclude sibling ~/.hermes/profiles/)
+        shutil.copytree(
+            source_dir,
+            profile_dir,
+            ignore=_clone_all_copytree_ignore(source_dir),
+        )
        # Strip runtime files
        for stale in _CLONE_ALL_STRIP:
            (profile_dir / stale).unlink(missing_ok=True)
@@ -111,6 +111,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": HermesOverlay(
+        transport="anthropic_messages",
+        auth_type="oauth_external",
+        base_url_override="https://api.minimax.io/anthropic",
+    ),
    "minimax-cn": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_CN_BASE_URL",
@@ -1070,6 +1070,20 @@ def resolve_runtime_provider(
            logger.info("Qwen OAuth credentials failed; "
                        "falling through to next provider.")

+    if provider == "minimax-oauth":
+        pconfig = PROVIDER_REGISTRY.get(provider)
+        if pconfig and pconfig.auth_type == "oauth_minimax":
+            from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+            creds = resolve_minimax_oauth_runtime_credentials()
+            return {
+                "provider": provider,
+                "api_mode": "anthropic_messages",
+                "base_url": creds["base_url"],
+                "api_key": creds["api_key"],
+                "source": creds.get("source", "oauth"),
+                "requested_provider": requested_provider,
+            }
+
    if provider == "google-gemini-cli":
        try:
            creds = resolve_gemini_oauth_runtime_credentials()
@@ -12,6 +12,7 @@ Config files are stored in ~/.hermes/ for easy access.
 """

 import importlib.util
+import json
 import logging
 import os
 import shutil
@@ -131,6 +132,7 @@ def _set_reasoning_effort(config: Dict[str, Any], effort: str) -> None:

 # Import config helpers
 from hermes_cli.config import (
+    cfg_get,
    DEFAULT_CONFIG,
    get_hermes_home,
    get_config_path,
@@ -138,6 +140,7 @@ from hermes_cli.config import (
    load_config,
    save_config,
    save_env_value,
+    remove_env_value,
    get_env_value,
    ensure_hermes_home,
 )
@@ -441,7 +444,7 @@ def _print_setup_summary(config: dict, hermes_home):
            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))

    # TTS — show configured provider
-    tts_provider = config.get("tts", {}).get("provider", "edge")
+    tts_provider = cfg_get(config, "tts", "provider", default="edge")
    if subscription_features.tts.managed_by_nous:
        tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None))
    elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
@@ -480,7 +483,7 @@ def _print_setup_summary(config: dict, hermes_home):

    if subscription_features.modal.managed_by_nous:
        tool_status.append(("Modal Execution (Nous subscription)", True, None))
-    elif config.get("terminal", {}).get("backend") == "modal":
+    elif cfg_get(config, "terminal", "backend") == "modal":
        if subscription_features.modal.direct_override:
            tool_status.append(("Modal Execution (direct Modal)", True, None))
        else:
@@ -654,6 +657,102 @@ def _prompt_container_resources(config: dict):
        pass


+def _prompt_vercel_sandbox_settings(config: dict):
+    """Prompt for Vercel Sandbox settings without exposing unsupported disk sizing."""
+    terminal = config.setdefault("terminal", {})
+
+    print()
+    print_info("Vercel Sandbox settings:")
+    print_info("  Filesystem persistence uses Vercel snapshots.")
+    print_info("  Snapshots restore files only; live processes do not continue after sandbox recreation.")
+
+    from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+
+    current_runtime = terminal.get("vercel_runtime") or "node24"
+    supported_label = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+    runtime = prompt(f"  Runtime ({supported_label})", current_runtime).strip() or current_runtime
+    if runtime not in _SUPPORTED_VERCEL_RUNTIMES:
+        print_warning(f"Unsupported Vercel runtime '{runtime}', keeping {current_runtime}.")
+        runtime = current_runtime if current_runtime in _SUPPORTED_VERCEL_RUNTIMES else "node24"
+    terminal["vercel_runtime"] = runtime
+    save_env_value("TERMINAL_VERCEL_RUNTIME", runtime)
+
+    current_persist = terminal.get("container_persistent", True)
+    persist_label = "yes" if current_persist else "no"
+    terminal["container_persistent"] = prompt(
+        "  Persist filesystem with snapshots? (yes/no)", persist_label
+    ).lower() in ("yes", "true", "y", "1")
+
+    current_cpu = terminal.get("container_cpu", 1)
+    cpu_str = prompt("  CPU cores", str(current_cpu))
+    try:
+        terminal["container_cpu"] = float(cpu_str)
+    except ValueError:
+        pass
+
+    current_mem = terminal.get("container_memory", 5120)
+    mem_str = prompt("  Memory in MB (5120 = 5GB)", str(current_mem))
+    try:
+        terminal["container_memory"] = int(mem_str)
+    except ValueError:
+        pass
+
+    if terminal.get("container_disk", 51200) not in (0, 51200):
+        print_warning("Vercel Sandbox does not support custom disk sizing; resetting container_disk to 51200.")
+    terminal["container_disk"] = 51200
+
+    print()
+    print_info("Vercel authentication:")
+    print_info("  Use a long-lived Vercel access token plus project/team IDs.")
+    linked_project = _read_nearest_vercel_project()
+    if linked_project:
+        print_info("  Found defaults in nearest .vercel/project.json.")
+
+    remove_env_value("VERCEL_OIDC_TOKEN")
+    token = prompt("    Vercel access token", get_env_value("VERCEL_TOKEN") or "", password=True)
+    project = prompt(
+        "    Vercel project ID",
+        get_env_value("VERCEL_PROJECT_ID") or linked_project.get("projectId", ""),
+    )
+    team = prompt(
+        "    Vercel team ID",
+        get_env_value("VERCEL_TEAM_ID") or linked_project.get("orgId", ""),
+    )
+    if token:
+        save_env_value("VERCEL_TOKEN", token)
+    if project:
+        save_env_value("VERCEL_PROJECT_ID", project)
+    if team:
+        save_env_value("VERCEL_TEAM_ID", team)
+
+
+def _read_nearest_vercel_project(start: Path | None = None) -> dict[str, str]:
+    """Read project/team defaults from the nearest Vercel link file."""
+    current = (start or Path.cwd()).resolve()
+    if current.is_file():
+        current = current.parent
+
+    for directory in (current, *current.parents):
+        project_file = directory / ".vercel" / "project.json"
+        if not project_file.exists():
+            continue
+        try:
+            data = json.loads(project_file.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            return {}
+        if not isinstance(data, dict):
+            return {}
+        return {
+            key: value
+            for key, value in {
+                "projectId": data.get("projectId"),
+                "orgId": data.get("orgId"),
+            }.items()
+            if isinstance(value, str) and value.strip()
+        }
+    return {}
+
+
 # Tool categories and provider config are now in tools_config.py (shared
 # between `hermes tools` and `hermes setup tools`).

@@ -1179,7 +1278,7 @@ def setup_terminal_backend(config: dict):
    print_info(f"   Guide: {_DOCS_BASE}/developer-guide/environments")
    print()

-    current_backend = config.get("terminal", {}).get("backend", "local")
+    current_backend = cfg_get(config, "terminal", "backend", default="local")
    is_linux = _platform.system() == "Linux"

    # Build backend choices with descriptions
@@ -1189,11 +1288,12 @@ def setup_terminal_backend(config: dict):
        "Modal - serverless cloud sandbox",
        "SSH - run on a remote machine",
        "Daytona - persistent cloud development environment",
+        "Vercel Sandbox - cloud microVM with snapshot filesystem persistence",
    ]
-    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona"}
-    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4}
+    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona", 5: "vercel_sandbox"}
+    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4, "vercel_sandbox": 5}

-    next_idx = 5
+    next_idx = 6
    if is_linux:
        terminal_choices.append("Singularity/Apptainer - HPC-friendly container")
        idx_to_backend[next_idx] = "singularity"
@@ -1228,7 +1328,7 @@ def setup_terminal_backend(config: dict):
        print_info(
            "  the agent starts. CLI mode always starts in the current directory."
        )
-        current_cwd = config.get("terminal", {}).get("cwd", "")
+        current_cwd = cfg_get(config, "terminal", "cwd", default="")
        cwd = prompt("  Messaging working directory", current_cwd or str(Path.home()))
        if cwd:
            config["terminal"]["cwd"] = cwd
@@ -1259,9 +1359,7 @@ def setup_terminal_backend(config: dict):
            print_info(f"Docker found: {docker_bin}")

        # Docker image
-        current_image = config.get("terminal", {}).get(
-            "docker_image", "nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "docker_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Docker image", current_image)
        config["terminal"]["docker_image"] = image
        save_env_value("TERMINAL_DOCKER_IMAGE", image)
@@ -1281,9 +1379,7 @@ def setup_terminal_backend(config: dict):
        else:
            print_info(f"Found: {sing_bin}")

-        current_image = config.get("terminal", {}).get(
-            "singularity_image", "docker://nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "singularity_image", default="docker://nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Container image", current_image)
        config["terminal"]["singularity_image"] = image
        save_env_value("TERMINAL_SINGULARITY_IMAGE", image)
@@ -1302,7 +1398,7 @@ def setup_terminal_backend(config: dict):
            get_nous_subscription_features(config).nous_auth_present
            and is_managed_tool_gateway_ready("modal")
        )
-        modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode"))
+        modal_mode = normalize_modal_mode(cfg_get(config, "terminal", "modal_mode"))
        use_managed_modal = False
        if managed_modal_available:
            modal_choices = [
@@ -1439,15 +1535,46 @@ def setup_terminal_backend(config: dict):
                print_success("    Configured")

        # Daytona image
-        current_image = config.get("terminal", {}).get(
-            "daytona_image", "nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "daytona_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Sandbox image", current_image)
        config["terminal"]["daytona_image"] = image
        save_env_value("TERMINAL_DAYTONA_IMAGE", image)

        _prompt_container_resources(config)

+    elif selected_backend == "vercel_sandbox":
+        print_success("Terminal backend: Vercel Sandbox")
+        print_info("Cloud microVM sandboxes with snapshot-backed filesystem persistence.")
+        print_info("Requires the optional SDK: pip install 'hermes-agent[vercel]'")
+
+        try:
+            __import__("vercel")
+        except ImportError:
+            print_info("Installing vercel SDK...")
+            import subprocess
+
+            uv_bin = shutil.which("uv")
+            if uv_bin:
+                result = subprocess.run(
+                    [uv_bin, "pip", "install", "--python", sys.executable, "vercel"],
+                    capture_output=True,
+                    text=True,
+                )
+            else:
+                result = subprocess.run(
+                    [sys.executable, "-m", "pip", "install", "vercel"],
+                    capture_output=True,
+                    text=True,
+                )
+            if result.returncode == 0:
+                print_success("vercel SDK installed")
+            else:
+                print_warning("Install failed — run manually: pip install 'hermes-agent[vercel]'")
+                if result.stderr:
+                    print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")
+
+        _prompt_vercel_sandbox_settings(config)
+
    elif selected_backend == "ssh":
        print_success("Terminal backend: SSH")
        print_info("Run commands on a remote machine via SSH.")
@@ -1501,6 +1628,8 @@ def setup_terminal_backend(config: dict):
    save_env_value("TERMINAL_ENV", selected_backend)
    if selected_backend == "modal":
        save_env_value("TERMINAL_MODAL_MODE", config["terminal"].get("modal_mode", "auto"))
+    if selected_backend == "vercel_sandbox":
+        save_env_value("TERMINAL_VERCEL_RUNTIME", config["terminal"].get("vercel_runtime", "node24"))
    save_config(config)
    print()
    print_success(f"Terminal backend set to: {selected_backend}")
@@ -1545,7 +1674,7 @@ def setup_agent_settings(config: dict):

    # ── Max Iterations ──
    current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
-        config.get("agent", {}).get("max_turns", 90)
+        cfg_get(config, "agent", "max_turns", default=90)
    )
    print_info("Maximum tool-calling iterations per conversation.")
    print_info("Higher = more complex tasks, but costs more tokens.")
@@ -1573,7 +1702,7 @@ def setup_agent_settings(config: dict):
    print_info("  all     — Show every tool call with a short preview")
    print_info("  verbose — Full args, results, and debug logs")

-    current_mode = config.get("display", {}).get("tool_progress", "all")
+    current_mode = cfg_get(config, "display", "tool_progress", default="all")
    mode = prompt("Tool progress mode", current_mode)
    if mode.lower() in ("off", "new", "all", "verbose"):
        if "display" not in config:
@@ -1593,7 +1722,7 @@ def setup_agent_settings(config: dict):

    config.setdefault("compression", {})["enabled"] = True

-    current_threshold = config.get("compression", {}).get("threshold", 0.50)
+    current_threshold = cfg_get(config, "compression", "threshold", default=0.50)
    threshold_str = prompt("Compression threshold (0.5-0.95)", str(current_threshold))
    try:
        threshold = float(threshold_str)
@@ -2601,11 +2730,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
        return "configured"

    elif section_key == "terminal":
-        backend = config.get("terminal", {}).get("backend", "local")
+        backend = cfg_get(config, "terminal", "backend", default="local")
        return f"backend: {backend}"

    elif section_key == "agent":
-        max_turns = config.get("agent", {}).get("max_turns", 90)
+        max_turns = cfg_get(config, "agent", "max_turns", default=90)
        return f"max turns: {max_turns}"

    elif section_key == "gateway":
@@ -13,7 +13,7 @@ Config stored in ~/.hermes/config.yaml under:
 """
 from typing import List, Optional, Set

-from hermes_cli.config import load_config, save_config
+from hermes_cli.config import cfg_get, load_config, save_config
 from hermes_cli.colors import Colors, color
 from hermes_cli.platforms import PLATFORMS as _PLATFORMS

@@ -30,7 +30,7 @@ def get_disabled_skills(config: dict, platform: Optional[str] = None) -> Set[str
    global_disabled = set(skills_cfg.get("disabled", []))
    if platform is None:
        return global_disabled
-    platform_disabled = skills_cfg.get("platform_disabled", {}).get(platform)
+    platform_disabled = cfg_get(skills_cfg, "platform_disabled", platform)
    if platform_disabled is None:
        return global_disabled
    return set(platform_disabled)
@@ -7,6 +7,7 @@ Shows the status of all Hermes Agent components.
 import os
 import sys
 import subprocess  # noqa: F401 — re-exported for tests that monkeypatch status.subprocess to guard against regressions
+import importlib.util
 from pathlib import Path

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@@ -17,6 +18,7 @@ from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load
 from hermes_cli.models import provider_label
 from hermes_cli.nous_subscription import get_nous_subscription_features
 from hermes_cli.runtime_provider import resolve_requested_provider
+from hermes_cli.vercel_auth import describe_vercel_auth
 from hermes_constants import OPENROUTER_MODELS_URL
 from tools.tool_backend_helpers import managed_nous_tools_enabled

@@ -157,14 +159,21 @@ def show_status(args):
    print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))

    try:
-        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status, get_qwen_auth_status
+        from hermes_cli.auth import (
+            get_nous_auth_status,
+            get_codex_auth_status,
+            get_qwen_auth_status,
+            get_minimax_oauth_auth_status,
+        )
        nous_status = get_nous_auth_status()
        codex_status = get_codex_auth_status()
        qwen_status = get_qwen_auth_status()
+        minimax_status = get_minimax_oauth_auth_status()
    except Exception:
        nous_status = {}
        codex_status = {}
        qwen_status = {}
+        minimax_status = {}

    nous_logged_in = bool(nous_status.get("logged_in"))
    nous_error = nous_status.get("error")
@@ -217,6 +226,20 @@ def show_status(args):
    if qwen_status.get("error") and not qwen_logged_in:
        print(f"    Error:      {qwen_status.get('error')}")

+    minimax_logged_in = bool(minimax_status.get("logged_in"))
+    print(
+        f"  {'MiniMax OAuth':<12}  {check_mark(minimax_logged_in)} "
+        f"{'logged in' if minimax_logged_in else 'not logged in (run: hermes auth add minimax-oauth)'}"
+    )
+    minimax_region = minimax_status.get("region")
+    if minimax_logged_in and minimax_region:
+        print(f"    Region:     {minimax_region}")
+    minimax_exp = minimax_status.get("expires_at")
+    if minimax_exp:
+        print(f"    Access exp: {minimax_exp}")
+    if minimax_status.get("error") and not minimax_logged_in:
+        print(f"    Error:      {minimax_status.get('error')}")
+
    # =========================================================================
    # Nous Subscription Features
    # =========================================================================
@@ -300,15 +323,10 @@ def show_status(args):
    print()
    print(color("◆ Terminal Backend", Colors.CYAN, Colors.BOLD))
    
+    terminal_cfg = config.get("terminal", {}) if isinstance(config.get("terminal"), dict) else {}
    terminal_env = os.getenv("TERMINAL_ENV", "")
    if not terminal_env:
-        # Fall back to config file value when env var isn't set
-        # (hermes status doesn't go through cli.py's config loading)
-        try:
-            _cfg = load_config()
-            terminal_env = _cfg.get("terminal", {}).get("backend", "local")
-        except Exception:
-            terminal_env = "local"
+        terminal_env = terminal_cfg.get("backend", "local")
    print(f"  Backend:      {terminal_env}")
    
    if terminal_env == "ssh":
@@ -322,6 +340,23 @@ def show_status(args):
    elif terminal_env == "daytona":
        daytona_image = os.getenv("TERMINAL_DAYTONA_IMAGE", "nikolaik/python-nodejs:python3.11-nodejs20")
        print(f"  Daytona Image: {daytona_image}")
+    elif terminal_env == "vercel_sandbox":
+        runtime = os.getenv("TERMINAL_VERCEL_RUNTIME") or terminal_cfg.get("vercel_runtime") or "node24"
+        persist = os.getenv("TERMINAL_CONTAINER_PERSISTENT")
+        if persist is None:
+            persist_enabled = bool(terminal_cfg.get("container_persistent", True))
+        else:
+            persist_enabled = persist.lower() in ("1", "true", "yes", "on")
+        auth_status = describe_vercel_auth()
+        sdk_ok = importlib.util.find_spec("vercel") is not None
+        sdk_label = "installed" if sdk_ok else "missing (install: pip install 'hermes-agent[vercel]')"
+        print(f"  Runtime:      {runtime}")
+        print(f"  SDK:          {check_mark(sdk_ok)} {sdk_label}")
+        print(f"  Auth:         {check_mark(auth_status.ok)} {auth_status.label}")
+        for line in auth_status.detail_lines:
+            print(f"  Auth detail:  {line}")
+        print(f"  Persistence:  {'snapshot filesystem' if persist_enabled else 'ephemeral filesystem'}")
+        print("  Processes:    live processes do not survive cleanup, snapshots, or sandbox recreation")
    
    sudo_password = os.getenv("SUDO_PASSWORD", "")
    print(f"  Sudo:         {check_mark(bool(sudo_password))} {'enabled' if sudo_password else 'disabled'}")
@@ -18,6 +18,7 @@ from typing import Dict, List, Optional, Set


 from hermes_cli.config import (
+    cfg_get,
    load_config, save_config, get_env_value, save_env_value,
 )
 from hermes_cli.colors import Colors, color
@@ -965,7 +966,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # Get existing toolsets for this platform
-    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
+    existing_toolsets = cfg_get(config, "platform_toolsets", platform, default=[])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
    existing_toolsets = [str(ts) for ts in existing_toolsets]
@@ -1352,23 +1353,23 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        if provider.get("tts_provider"):
            return (
                feature.managed_by_nous
-                and config.get("tts", {}).get("provider") == provider["tts_provider"]
+                and cfg_get(config, "tts", "provider") == provider["tts_provider"]
            )
        if "browser_provider" in provider:
-            current = config.get("browser", {}).get("cloud_provider")
+            current = cfg_get(config, "browser", "cloud_provider")
            return feature.managed_by_nous and provider["browser_provider"] == current
        if provider.get("web_backend"):
-            current = config.get("web", {}).get("backend")
+            current = cfg_get(config, "web", "backend")
            return feature.managed_by_nous and current == provider["web_backend"]
        return feature.managed_by_nous

    if provider.get("tts_provider"):
-        return config.get("tts", {}).get("provider") == provider["tts_provider"]
+        return cfg_get(config, "tts", "provider") == provider["tts_provider"]
    if "browser_provider" in provider:
-        current = config.get("browser", {}).get("cloud_provider")
+        current = cfg_get(config, "browser", "cloud_provider")
        return provider["browser_provider"] == current
    if provider.get("web_backend"):
-        current = config.get("web", {}).get("backend")
+        current = cfg_get(config, "web", "backend")
        return current == provider["web_backend"]
    if provider.get("imagegen_backend"):
        image_cfg = config.get("image_gen", {})
@@ -0,0 +1,70 @@
+"""Helpers for reporting Vercel Sandbox authentication state."""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+
+
+_TOKEN_TUPLE_VARS = ("VERCEL_TOKEN", "VERCEL_PROJECT_ID", "VERCEL_TEAM_ID")
+
+
+@dataclass(frozen=True)
+class VercelAuthStatus:
+    ok: bool
+    label: str
+    detail_lines: tuple[str, ...]
+
+
+def _present(name: str) -> bool:
+    return bool(os.getenv(name))
+
+
+def describe_vercel_auth() -> VercelAuthStatus:
+    """Return Vercel auth status without exposing secret values."""
+
+    has_oidc = _present("VERCEL_OIDC_TOKEN")
+    token_states = {name: _present(name) for name in _TOKEN_TUPLE_VARS}
+    present_token_vars = tuple(name for name, present in token_states.items() if present)
+    missing_token_vars = tuple(name for name, present in token_states.items() if not present)
+
+    if has_oidc:
+        details = [
+            "mode: OIDC",
+            "active env: VERCEL_OIDC_TOKEN",
+            "note: OIDC tokens are development-only; use access-token auth for deployments and long-running processes",
+        ]
+        if present_token_vars:
+            details.append(f"also present: {', '.join(present_token_vars)}")
+        return VercelAuthStatus(True, "OIDC token via VERCEL_OIDC_TOKEN", tuple(details))
+
+    if not missing_token_vars:
+        return VercelAuthStatus(
+            True,
+            "access token + project/team via VERCEL_TOKEN, VERCEL_PROJECT_ID, VERCEL_TEAM_ID",
+            (
+                "mode: access token",
+                "active env: VERCEL_TOKEN, VERCEL_PROJECT_ID, VERCEL_TEAM_ID",
+            ),
+        )
+
+    if present_token_vars:
+        return VercelAuthStatus(
+            False,
+            f"partial access-token auth (missing {', '.join(missing_token_vars)})",
+            (
+                "mode: incomplete access token",
+                f"present env: {', '.join(present_token_vars)}",
+                f"missing env: {', '.join(missing_token_vars)}",
+                "recommended: set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together",
+            ),
+        )
+
+    return VercelAuthStatus(
+        False,
+        "not configured",
+        (
+            "recommended: set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID",
+            "development-only alternative: set VERCEL_OIDC_TOKEN",
+        ),
+    )
@@ -33,6 +33,7 @@ if str(PROJECT_ROOT) not in sys.path:

 from hermes_cli import __version__, __release_date__
 from hermes_cli.config import (
+    cfg_get,
    DEFAULT_CONFIG,
    OPTIONAL_ENV_VARS,
    get_config_path,
@@ -252,7 +253,12 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
    "terminal.backend": {
        "type": "select",
        "description": "Terminal execution backend",
-        "options": ["local", "docker", "ssh", "modal", "daytona", "singularity"],
+        "options": ["local", "docker", "ssh", "modal", "daytona", "vercel_sandbox", "singularity"],
+    },
+    "terminal.vercel_runtime": {
+        "type": "select",
+        "description": "Vercel Sandbox runtime",
+        "options": ["node24", "node22", "python3.13"],  # sync with _SUPPORTED_VERCEL_RUNTIMES in terminal_tool.py
    },
    "terminal.modal_mode": {
        "type": "select",
@@ -338,6 +344,7 @@ _CATEGORY_MERGE: Dict[str, str] = {
    "human_delay": "display",
    "dashboard": "display",
    "code_execution": "agent",
+    "prompt_caching": "agent",
 }

 # Display order for tabs — unlisted categories sort alphabetically after these.
@@ -1214,6 +1221,14 @@ _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
        "docs_url": "https://github.com/QwenLM/qwen-code",
        "status_fn": None,  # dispatched via auth.get_qwen_auth_status
    },
+    {
+        "id": "minimax-oauth",
+        "name": "MiniMax (OAuth)",
+        "flow": "pkce",
+        "cli_command": "hermes auth add minimax-oauth",
+        "docs_url": "https://www.minimax.io",
+        "status_fn": None,  # dispatched via auth.get_minimax_oauth_auth_status
+    },
 )


@@ -1257,6 +1272,16 @@ def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
                "expires_at": raw.get("expires_at"),
                "has_refresh_token": bool(raw.get("has_refresh_token")),
            }
+        if provider_id == "minimax-oauth":
+            raw = hauth.get_minimax_oauth_auth_status()
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": "minimax_oauth",
+                "source_label": f"MiniMax ({raw.get('region', 'global')})",
+                "token_preview": None,
+                "expires_at": raw.get("expires_at"),
+                "has_refresh_token": True,
+            }
    except Exception as e:
        return {"logged_in": False, "error": str(e)}
    return {"logged_in": False}
@@ -2902,7 +2927,7 @@ async def get_dashboard_themes():
    them without a stub.
    """
    config = load_config()
-    active = config.get("dashboard", {}).get("theme", "default")
+    active = cfg_get(config, "dashboard", "theme", default="default")
    user_themes = _discover_user_themes()
    seen = set()
    themes = []
@@ -19,6 +19,7 @@ from typing import Dict

 from hermes_constants import display_hermes_home
 from utils import atomic_replace
+from hermes_cli.config import cfg_get


 _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
@@ -60,7 +61,7 @@ def _get_webhook_config() -> dict:
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
-        return cfg.get("platforms", {}).get("webhook", {})
+        return cfg_get(cfg, "platforms", "webhook", default={})
    except Exception:
        return {}

@@ -107,17 +107,58 @@ def _run_async(coro):
        loop = None

    if loop and loop.is_running():
-        # Inside an async context (gateway, RL env) — run in a fresh thread.
+        # Inside an async context (gateway, RL env) — run in a fresh thread
+        # with its own event loop we own a reference to, so on timeout we
+        # can cancel the task inside that loop (ThreadPoolExecutor.cancel()
+        # only works on not-yet-started futures — it's a no-op on a running
+        # worker, which previously leaked the thread on every 300 s timeout).
        import concurrent.futures
+
+        worker_loop: Optional[asyncio.AbstractEventLoop] = None
+        loop_ready = threading.Event()
+
+        def _run_in_worker():
+            nonlocal worker_loop
+            worker_loop = asyncio.new_event_loop()
+            loop_ready.set()
+            try:
+                asyncio.set_event_loop(worker_loop)
+                return worker_loop.run_until_complete(coro)
+            finally:
+                try:
+                    # Cancel anything still pending (e.g. task cancelled
+                    # externally via call_soon_threadsafe on timeout).
+                    pending = asyncio.all_tasks(worker_loop)
+                    for t in pending:
+                        t.cancel()
+                    if pending:
+                        worker_loop.run_until_complete(
+                            asyncio.gather(*pending, return_exceptions=True)
+                        )
+                except Exception:
+                    pass
+                worker_loop.close()
+
        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-        future = pool.submit(asyncio.run, coro)
+        future = pool.submit(_run_in_worker)
        try:
            return future.result(timeout=300)
        except concurrent.futures.TimeoutError:
-            future.cancel()
+            # Cancel the coroutine inside its own loop so the worker thread
+            # can wind down instead of running forever.
+            if loop_ready.wait(timeout=1.0) and worker_loop is not None:
+                try:
+                    for t in asyncio.all_tasks(worker_loop):
+                        worker_loop.call_soon_threadsafe(t.cancel)
+                except RuntimeError:
+                    # Loop already closed — nothing to cancel.
+                    pass
            raise
        finally:
-            pool.shutdown(wait=False, cancel_futures=True)
+            # wait=False: don't block the caller on a stuck coroutine. We've
+            # already requested cancellation above; the worker will exit
+            # once the coroutine observes it (usually at the next await).
+            pool.shutdown(wait=False)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -627,6 +668,13 @@ def handle_function_call(
        # Check plugin hooks for a block directive (unless caller already
        # checked — e.g. run_agent._invoke_tool passes skip=True to
        # avoid double-firing the hook).
+        #
+        # Single-fire contract: pre_tool_call fires exactly once per tool
+        # execution. get_pre_tool_call_block_message() internally calls
+        # invoke_hook("pre_tool_call", ...) and returns the first block
+        # directive (if any), so observer plugins see the hook on that same
+        # pass. When skip=True, the caller already fired it — do nothing
+        # here.
        if not skip_pre_tool_call_hook:
            block_message: Optional[str] = None
            try:
@@ -643,21 +691,6 @@ def handle_function_call(

            if block_message is not None:
                return json.dumps({"error": block_message}, ensure_ascii=False)
-        else:
-            # Still fire the hook for observers — just don't check for blocking
-            # (the caller already did that).
-            try:
-                from hermes_cli.plugins import invoke_hook
-                invoke_hook(
-                    "pre_tool_call",
-                    tool_name=function_name,
-                    args=function_args,
-                    task_id=task_id or "",
-                    session_id=session_id or "",
-                    tool_call_id=tool_call_id or "",
-                )
-            except Exception:
-                pass

        # Notify the read-loop tracker when a non-read/search tool runs,
        # so the *consecutive* counter resets (reads after other work are fine).
@@ -737,7 +770,7 @@ def handle_function_call(

    except Exception as e:
        error_msg = f"Error executing {function_name}: {str(e)}"
-        logger.error(error_msg)
+        logger.exception(error_msg)
        return json.dumps({"error": error_msg}, ensure_ascii=False)


@@ -4,7 +4,7 @@ let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-Chz+NW9NXqboXHOa6PKwf5bhAkkcFtKNhvKWwg2XSPc=";
+    hash = "sha256-a/HGI9OgVcTnZrMXA7xFMGnFoVxyHe95fulVz+WNYB0=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -27,6 +27,7 @@ import logging
 import sys
 from pathlib import Path
 from typing import List, Optional, Tuple
+from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

@@ -314,7 +315,7 @@ def _get_active_memory_provider() -> Optional[str]:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        return config.get("memory", {}).get("provider") or None
+        return cfg_get(config, "memory", "provider") or None
    except Exception:
        return None

@@ -29,10 +29,12 @@ Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 from __future__ import annotations

 import asyncio
+import atexit
 import importlib
 import json
 import logging
 import os
+import queue
 import threading

 from datetime import datetime, timezone
@@ -41,6 +43,7 @@ from typing import Any, Dict, List
 from agent.memory_provider import MemoryProvider
 from hermes_constants import get_hermes_home
 from tools.registry import tool_error
+from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

@@ -99,6 +102,10 @@ _loop: asyncio.AbstractEventLoop | None = None
 _loop_thread: threading.Thread | None = None
 _loop_lock = threading.Lock()

+# Sentinel pushed to the per-provider retain queue to wake the writer for a
+# clean exit. A unique object so it can never collide with a real job.
+_WRITER_SENTINEL = object()
+

 def _get_loop() -> asyncio.AbstractEventLoop:
    """Return a long-lived event loop running on a background thread."""
@@ -443,6 +450,16 @@ class HindsightMemoryProvider(MemoryProvider):
        self._prefetch_result = ""
        self._prefetch_lock = threading.Lock()
        self._prefetch_thread = None
+        # Single-writer model for retain. sync_turn() enqueues; the writer
+        # thread drains sequentially. Avoids spawning ad-hoc threads that
+        # can race the interpreter shutdown and emit "cannot schedule new
+        # futures after interpreter shutdown" / "Unclosed client session".
+        self._retain_queue: queue.Queue = queue.Queue()
+        self._writer_thread: threading.Thread | None = None
+        self._shutting_down = threading.Event()
+        self._atexit_registered = False
+        # Legacy alias — older tests/callers reference _sync_thread directly.
+        # Points at _writer_thread once the writer is running.
        self._sync_thread = None
        self._session_id = ""
        self._parent_session_id = ""
@@ -817,6 +834,73 @@ class HindsightMemoryProvider(MemoryProvider):
            )
        )

+    def _ensure_writer(self) -> None:
+        """Lazy-start the single retain-writer thread.
+
+        We don't start the writer in initialize() so providers that never
+        retain (e.g. tools-only mode) don't pay for an idle thread.
+        """
+        thread = self._writer_thread
+        if thread is not None and thread.is_alive():
+            return
+        # If the previous writer exited (e.g. after a prior shutdown), reset
+        # the flag so this fresh writer is allowed to drain new jobs.
+        self._shutting_down.clear()
+        thread = threading.Thread(
+            target=self._writer_loop,
+            daemon=True,
+            name="hindsight-writer",
+        )
+        self._writer_thread = thread
+        # Keep the legacy _sync_thread alias pointing at the writer so any
+        # external code that joins _sync_thread keeps working.
+        self._sync_thread = thread
+        thread.start()
+
+    def _writer_loop(self) -> None:
+        """Drain the retain queue serially. Exits on sentinel.
+
+        Each job() is wrapped so a single failure can't kill the writer.
+        task_done() always fires so queue.join() works in tests.
+        """
+        while True:
+            try:
+                job = self._retain_queue.get(timeout=1.0)
+            except queue.Empty:
+                if self._shutting_down.is_set():
+                    return
+                continue
+            try:
+                if job is _WRITER_SENTINEL:
+                    return
+                try:
+                    job()
+                except Exception as exc:
+                    logger.warning("Hindsight retain failed: %s", exc, exc_info=True)
+            finally:
+                self._retain_queue.task_done()
+
+    def _register_atexit(self) -> None:
+        """Register an idempotent atexit hook to drain the writer.
+
+        Without this, a CLI exit that doesn't go through MemoryManager.
+        shutdown_all() would leave in-flight retain jobs racing interpreter
+        teardown, producing "cannot schedule new futures" warnings and
+        unclosed aiohttp sessions.
+        """
+        if self._atexit_registered:
+            return
+        self._atexit_registered = True
+        atexit.register(self._atexit_shutdown)
+
+    def _atexit_shutdown(self) -> None:
+        if self._shutting_down.is_set():
+            return
+        try:
+            self.shutdown()
+        except Exception as exc:
+            logger.debug("Hindsight atexit shutdown failed: %s", exc)
+
    def _run_hindsight_operation(self, operation):
        """Run an async Hindsight client operation, retrying once after idle shutdown."""
        client = self._get_client()
@@ -913,7 +997,7 @@ class HindsightMemoryProvider(MemoryProvider):
        self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
        self._llm_base_url = self._config.get("llm_base_url", "")

-        banks = self._config.get("banks", {}).get("hermes", {})
+        banks = cfg_get(self._config, "banks", "hermes", default={})
        static_bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
        self._bank_id_template = self._config.get("bank_id_template", "") or ""
        self._bank_id = _resolve_bank_id_template(
@@ -1080,6 +1164,9 @@ class HindsightMemoryProvider(MemoryProvider):
        if not self._auto_recall:
            logger.debug("Prefetch: skipped (auto_recall disabled)")
            return
+        if self._shutting_down.is_set():
+            logger.debug("Prefetch: skipped (shutting down)")
+            return
        # Truncate query to max chars
        if self._recall_max_input_chars and len(query) > self._recall_max_input_chars:
            query = query[:self._recall_max_input_chars]
@@ -1188,13 +1275,19 @@ class HindsightMemoryProvider(MemoryProvider):
        return kwargs

    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Retain conversation turn in background (non-blocking).
+        """Enqueue a retain for the current turn. Non-blocking.

-        Respects retain_every_n_turns for batching.
+        The actual aretain_batch runs on a single long-lived writer thread
+        that drains an in-memory queue. Once shutdown() has been called,
+        further sync_turn() calls are dropped — this prevents post-exit
+        retains from reaching aiohttp after interpreter shutdown begins.
        """
        if not self._auto_retain:
            logger.debug("sync_turn: skipped (auto_retain disabled)")
            return
+        if self._shutting_down.is_set():
+            logger.debug("sync_turn: skipped (shutting down)")
+            return

        if session_id:
            self._session_id = str(session_id).strip()
@@ -1219,37 +1312,42 @@ class HindsightMemoryProvider(MemoryProvider):
        if self._parent_session_id:
            lineage_tags.append(f"parent:{self._parent_session_id}")

-        def _sync():
-            try:
-                item = self._build_retain_kwargs(
-                    content,
-                    context=self._retain_context,
-                    metadata=self._build_metadata(
-                        message_count=len(self._session_turns) * 2,
-                        turn_index=self._turn_index,
-                    ),
-                    tags=lineage_tags or None,
-                )
-                item.pop("bank_id", None)
-                item.pop("retain_async", None)
-                logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
-                             self._bank_id, self._document_id, self._retain_async, len(content), len(self._session_turns))
-                self._run_hindsight_operation(
-                    lambda client: client.aretain_batch(
-                        bank_id=self._bank_id,
-                        items=[item],
-                        document_id=self._document_id,
-                        retain_async=self._retain_async,
-                    )
-                )
-                logger.debug("Hindsight retain succeeded")
-            except Exception as e:
-                logger.warning("Hindsight sync failed: %s", e, exc_info=True)
+        # Snapshot the state needed for the retain. The writer may run after
+        # _session_turns / _turn_index are mutated by a later sync_turn().
+        metadata_snapshot = self._build_metadata(
+            message_count=len(self._session_turns) * 2,
+            turn_index=self._turn_index,
+        )
+        num_turns = len(self._session_turns)
+        document_id = self._document_id
+        bank_id = self._bank_id
+        retain_async_flag = self._retain_async
+        retain_context = self._retain_context

-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="hindsight-sync")
-        self._sync_thread.start()
+        def _do_retain() -> None:
+            item = self._build_retain_kwargs(
+                content,
+                context=retain_context,
+                metadata=metadata_snapshot,
+                tags=lineage_tags or None,
+            )
+            item.pop("bank_id", None)
+            item.pop("retain_async", None)
+            logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
+                         bank_id, document_id, retain_async_flag, len(content), num_turns)
+            self._run_hindsight_operation(
+                lambda client: client.aretain_batch(
+                    bank_id=bank_id,
+                    items=[item],
+                    document_id=document_id,
+                    retain_async=retain_async_flag,
+                )
+            )
+            logger.debug("Hindsight retain succeeded")
+
+        self._ensure_writer()
+        self._register_atexit()
+        self._retain_queue.put(_do_retain)

    def get_tool_schemas(self) -> List[Dict[str, Any]]:
        if self._memory_mode == "context":
@@ -1324,11 +1422,149 @@ class HindsightMemoryProvider(MemoryProvider):

        return tool_error(f"Unknown tool: {tool_name}")

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Refresh cached per-session state when the agent rotates session_id.
+
+        Fires on /resume, /branch, /reset, /new, and context compression.
+        Without this hook, initialize()-cached state (``_session_id``,
+        ``_document_id``, ``_session_turns``, ``_turn_counter``) would keep
+        pointing at the previous session and writes would land in the wrong
+        document. See hermes-agent#6672.
+
+        Always update ``_session_id`` so metadata and tags on subsequent
+        retains reflect the active session. Always mint a fresh
+        ``_document_id`` so the new session's retain doesn't overwrite the
+        old session's document on vectorize-io/hindsight#1303. Always clear
+        the accumulated batch buffers (``_session_turns``, ``_turn_counter``,
+        ``_turn_index``) — even for /resume and /branch, the new session's
+        batching must start from zero so an in-flight retain doesn't flush
+        under the wrong ``_document_id``.
+
+        Before clearing, flush any buffered turns under the *old*
+        ``_document_id``. Users who set ``retain_every_n_turns > 1`` would
+        otherwise silently lose whatever's in ``_session_turns`` at the
+        moment of switch — the same data-loss class as the shutdown race,
+        just at a different lifecycle event.
+
+        Also wait for any in-flight prefetch from the old session and drop
+        its cached result; otherwise the new session's first ``prefetch()``
+        could read stale recall text from before the switch.
+
+        ``parent_session_id`` is recorded for lineage tags on future retains.
+        ``reset`` is accepted but not needed for Hindsight's state model —
+        buffer clearing is correct for every session switch, not only /reset.
+        """
+        new_id = str(new_session_id or "").strip()
+        if not new_id:
+            return
+
+        # 1. Flush any buffered turns under the OLD identifiers. Snapshot
+        # everything before mutating self._* so metadata + tags + doc_id
+        # all reference the old session consistently.
+        if self._session_turns:
+            old_turns = list(self._session_turns)
+            old_session_id = self._session_id
+            old_document_id = self._document_id
+            old_parent_session_id = self._parent_session_id
+            old_turn_index = self._turn_index
+            old_metadata = self._build_metadata(
+                message_count=len(old_turns) * 2,
+                turn_index=old_turn_index,
+            )
+            old_lineage_tags: list[str] = []
+            if old_session_id:
+                old_lineage_tags.append(f"session:{old_session_id}")
+            if old_parent_session_id:
+                old_lineage_tags.append(f"parent:{old_parent_session_id}")
+            old_content = "[" + ",".join(old_turns) + "]"
+
+            def _flush():
+                try:
+                    item = self._build_retain_kwargs(
+                        old_content,
+                        context=self._retain_context,
+                        metadata=old_metadata,
+                        tags=old_lineage_tags or None,
+                    )
+                    item.pop("bank_id", None)
+                    item.pop("retain_async", None)
+                    logger.debug(
+                        "Hindsight flush-on-switch: bank=%s, doc=%s, num_turns=%d",
+                        self._bank_id, old_document_id, len(old_turns),
+                    )
+                    self._run_hindsight_operation(
+                        lambda client: client.aretain_batch(
+                            bank_id=self._bank_id,
+                            items=[item],
+                            document_id=old_document_id,
+                            retain_async=self._retain_async,
+                        )
+                    )
+                except Exception as e:
+                    logger.warning("Hindsight flush-on-switch failed: %s", e, exc_info=True)
+
+            # Route the flush through the same writer queue sync_turn
+            # uses. That serializes it behind any still-queued retains
+            # from the old session (FIFO by document_id), avoids racing
+            # two threads on aretain_batch against the same document, and
+            # keeps shutdown's drain semantics intact. Skip enqueue if
+            # shutdown has already fired — the writer is draining/gone.
+            if not self._shutting_down.is_set():
+                self._ensure_writer()
+                self._register_atexit()
+                self._retain_queue.put(_flush)
+
+        # 2. Drain any in-flight prefetch from the old session and drop
+        # its cached result so the new session doesn't see stale recall.
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            self._prefetch_result = ""
+
+        # 3. Now rotate to the new session.
+        if parent_session_id:
+            self._parent_session_id = str(parent_session_id).strip()
+        self._session_id = new_id
+        start_ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+        self._document_id = f"{self._session_id}-{start_ts}"
+        self._session_turns = []
+        self._turn_counter = 0
+        self._turn_index = 0
+        logger.debug(
+            "Hindsight on_session_switch: new_session=%s parent=%s reset=%s doc=%s",
+            self._session_id, self._parent_session_id, reset, self._document_id,
+        )
+
    def shutdown(self) -> None:
-        logger.debug("Hindsight shutdown: waiting for background threads")
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
+        logger.debug("Hindsight shutdown: stopping writer + waiting for background threads")
+        # Stop accepting new retain jobs first so anyone still calling
+        # sync_turn() during teardown is dropped, not enqueued.
+        self._shutting_down.set()
+        # Drain the writer: it will finish in-flight work, then exit on
+        # the sentinel. Bounded join keeps shutdown predictable even if
+        # the daemon is wedged.
+        writer = self._writer_thread
+        if writer is not None and writer.is_alive():
+            try:
+                self._retain_queue.put(_WRITER_SENTINEL)
+            except Exception:
+                pass
+            writer.join(timeout=10.0)
+            if writer.is_alive():
+                logger.warning(
+                    "Hindsight writer did not stop within 10s; "
+                    "abandoning %d pending retain(s)",
+                    self._retain_queue.qsize(),
+                )
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=5.0)
        if self._client is not None:
            try:
                if self._mode == "local_embedded":
@@ -26,6 +26,7 @@ from agent.memory_provider import MemoryProvider
 from tools.registry import tool_error
 from .store import MemoryStore
 from .retrieval import FactRetriever
+from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

@@ -102,7 +103,7 @@ def _load_plugin_config() -> dict:
        import yaml
        with open(config_path) as f:
            all_config = yaml.safe_load(f) or {}
-        return all_config.get("plugins", {}).get("hermes-memory-store", {}) or {}
+        return cfg_get(all_config, "plugins", "hermes-memory-store", default={}) or {}
    except Exception:
        return {}

@@ -12,6 +12,7 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
+from hermes_cli.config import cfg_get


 def clone_honcho_for_profile(profile_name: str) -> bool:
@@ -106,7 +107,7 @@ def cmd_enable(args) -> None:

    # If this is a new profile host block with no settings, clone from default
    if not block.get("aiPeer"):
-        default_block = cfg.get("hosts", {}).get(HOST, {})
+        default_block = cfg_get(cfg, "hosts", HOST, default={})
        for key in ("recallMode", "writeFrequency", "sessionStrategy",
                    "contextTokens", "dialecticReasoningLevel", "dialecticDynamic",
                    "dialecticMaxChars", "messageMaxChars", "dialecticMaxInputChars",
@@ -139,7 +140,7 @@ def cmd_disable(args) -> None:
    cfg = _read_config()
    host = _host_key()
    label = f"[{host}] " if host != "hermes" else ""
-    block = cfg.get("hosts", {}).get(host, {})
+    block = cfg_get(cfg, "hosts", host, default={})

    if not block or block.get("enabled") is False:
        print(f"  {label}Honcho is already disabled.\n")
@@ -212,7 +213,7 @@ def sync_honcho_profiles_quiet() -> int:
    if not cfg:
        return 0

-    default_block = cfg.get("hosts", {}).get(HOST, {})
+    default_block = cfg_get(cfg, "hosts", HOST, default={})
    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
    if not default_block and not has_key:
        return 0
@@ -30,6 +30,8 @@ dependencies = [
  "firecrawl-py>=4.16.0,<5",
  "parallel-web>=0.4.2,<1",
  "fal-client>=0.13.1,<1",
+  # Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
+  "croniter>=6.0.0,<7",
  # Text-to-speech (Edge TTS is free, no API key needed)
  "edge-tts>=7.2.7,<8",
  # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
@@ -39,9 +41,10 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
+vercel = ["vercel>=0.5.7,<0.6.0"]
 dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
 messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
-cron = ["croniter>=6.0.0,<7"]
+cron = []  # croniter is now a core dependency; this extra kept for back-compat
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29", "aiohttp-socks>=0.10,<1"]
 cli = ["simple-term-menu>=1.0,<2"]
@@ -100,6 +103,7 @@ yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88
 all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
+  "hermes-agent[vercel]",
  "hermes-agent[messaging]",
  # matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
  # modern macOS (archived libolm, C++ errors with Clang 21+).  On Linux the
@@ -160,6 +160,7 @@ from agent.trajectory import (
    save_trajectory as _save_trajectory_to_file,
 )
 from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
+from hermes_cli.config import cfg_get



@@ -925,6 +926,7 @@ class AIAgent:
        thread_id: str = None,
        gateway_session_key: str = None,
        skip_context_files: bool = False,
+        load_soul_identity: bool = False,
        skip_memory: bool = False,
        session_db=None,
        parent_session_id: str = None,
@@ -976,6 +978,9 @@ class AIAgent:
            skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
                into the system prompt. Use this for batch processing and data generation to avoid
                polluting trajectories with user-specific persona or project instructions.
+            load_soul_identity (bool): If True, still use ~/.hermes/SOUL.md as the primary
+                identity even when skip_context_files=True. Project context files from the cwd
+                remain skipped.
        """
        _install_safe_stdio()

@@ -1004,6 +1009,7 @@ class AIAgent:
        self._print_fn = None
        self.background_review_callback = None  # Optional sync callback for gateway delivery
        self.skip_context_files = skip_context_files
+        self.load_soul_identity = load_soul_identity
        self.pass_session_id = pass_session_id
        self._credential_pool = credential_pool
        self.log_prefix_chars = log_prefix_chars
@@ -1788,7 +1794,7 @@ class AIAgent:
        # compression model. Custom endpoints often cannot report this via
        # /models, so the startup feasibility check needs the config hint.
        try:
-            _aux_cfg = _agent_cfg.get("auxiliary", {}).get("compression", {})
+            _aux_cfg = cfg_get(_agent_cfg, "auxiliary", "compression", default={})
        except Exception:
            _aux_cfg = {}
        if isinstance(_aux_cfg, dict):
@@ -2813,6 +2819,24 @@ class AIAgent:
            # Third-party Anthropic-compatible gateway.
            return True, True

+        # MiniMax on its Anthropic-compatible endpoint serves its own
+        # model family (MiniMax-M2.7, M2.5, M2.1, M2) with documented
+        # cache_control support (0.1× read pricing, 5-minute TTL).  The
+        # blanket is_claude gate above excludes these — opt them in
+        # explicitly via provider id or host match so users on
+        # provider=minimax / minimax-cn (or custom endpoints pointing at
+        # api.minimax.io/anthropic / api.minimaxi.com/anthropic) get the
+        # same cost reduction as Claude traffic.
+        # Docs: https://platform.minimax.io/docs/api-reference/anthropic-api-compatible-cache
+        if is_anthropic_wire:
+            is_minimax_provider = provider_lower in {"minimax", "minimax-cn"}
+            is_minimax_host = (
+                base_url_host_matches(eff_base_url, "api.minimax.io")
+                or base_url_host_matches(eff_base_url, "api.minimaxi.com")
+            )
+            if is_minimax_provider or is_minimax_host:
+                return True, True
+
        # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
        # transport that accepts Anthropic-style cache_control markers and
        # rewards them with real cache hits.  Without this branch
@@ -4546,8 +4570,14 @@ class AIAgent:
        if not (self._memory_manager and final_response and original_user_message):
            return
        try:
-            self._memory_manager.sync_all(original_user_message, final_response)
-            self._memory_manager.queue_prefetch_all(original_user_message)
+            self._memory_manager.sync_all(
+                original_user_message, final_response,
+                session_id=self.session_id or "",
+            )
+            self._memory_manager.queue_prefetch_all(
+                original_user_message,
+                session_id=self.session_id or "",
+            )
        except Exception:
            pass

@@ -4717,9 +4747,11 @@ class AIAgent:
        #   6. Current date & time (frozen at build time)
        #   7. Platform-specific formatting hint

-        # Try SOUL.md as primary identity (unless context files are skipped)
+        # Try SOUL.md as primary identity unless the caller explicitly skipped it.
+        # Some execution modes (cron) still want HERMES_HOME persona while keeping
+        # cwd project instructions disabled.
        _soul_loaded = False
-        if not self.skip_context_files:
+        if self.load_soul_identity or not self.skip_context_files:
            _soul_content = load_soul_md()
            if _soul_content:
                prompt_parts = [_soul_content]
@@ -8234,6 +8266,7 @@ class AIAgent:
            model=self.model,
            messages=_msgs_for_chat,
            tools=self.tools,
+            base_url=self.base_url,
            timeout=self._resolved_api_call_timeout(),
            max_tokens=self.max_tokens,
            ephemeral_max_output_tokens=_ephemeral_out,
@@ -8919,6 +8952,23 @@ class AIAgent:
        except Exception as _ce_err:
            logger.debug("context engine on_session_start (compression): %s", _ce_err)

+        # Notify memory providers of the compression-driven session_id rotation
+        # so provider-cached per-session state (Hindsight's _document_id,
+        # accumulated turn buffers, counters) refreshes. reset=False because
+        # the logical conversation continues; only the id and DB row rolled
+        # over. See #6672.
+        try:
+            _old_sid = locals().get("old_session_id")
+            if _old_sid and self._memory_manager:
+                self._memory_manager.on_session_switch(
+                    self.session_id or "",
+                    parent_session_id=_old_sid,
+                    reset=False,
+                    reason="compression",
+                )
+        except Exception as _me_err:
+            logger.debug("memory manager on_session_switch (compression): %s", _me_err)
+
        # Warn on repeated compressions (quality degrades with each pass)
        _cc = self.context_compressor.compression_count
        if _cc >= 2:
@@ -9940,7 +9990,7 @@ class AIAgent:
                                   is_oauth=self._is_anthropic_oauth,
                                   preserve_dots=self._anthropic_preserve_dots())
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _summary_result = _tsum.normalize_response(summary_response)
+                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
                    final_response = (_summary_result.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
@@ -9970,7 +10020,7 @@ class AIAgent:
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    preserve_dots=self._anthropic_preserve_dots())
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_result = _tretry.normalize_response(retry_response)
+                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
                    final_response = (_retry_result.content or "").strip()
                else:
                    summary_kwargs = {
@@ -11098,7 +11148,12 @@ class AIAgent:
                        # would have been appended in the non-truncated path.
                        _trunc_msg = None
                        _trunc_transport = self._get_transport()
-                        _trunc_result = _trunc_transport.normalize_response(response)
+                        if self.api_mode == "anthropic_messages":
+                            _trunc_result = _trunc_transport.normalize_response(
+                                response, strip_tool_prefix=self._is_anthropic_oauth
+                            )
+                        else:
+                            _trunc_result = _trunc_transport.normalize_response(response)
                        _trunc_msg = _trunc_result

                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
@@ -12436,7 +12491,10 @@ class AIAgent:

            try:
                _transport = self._get_transport()
-                normalized = _transport.normalize_response(response)
+                _normalize_kwargs = {}
+                if self.api_mode == "anthropic_messages":
+                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
+                normalized = _transport.normalize_response(response, **_normalize_kwargs)
                assistant_message = normalized
                finish_reason = normalized.finish_reason
                
@@ -44,6 +44,9 @@ AUTHOR_MAP = {
    "qiyin.zuo@pcitc.com": "qiyin-code",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "2093036+exiao@users.noreply.github.com": "exiao",
+    "rylen.anil@gmail.com": "rylena",
+    "14046872+tmimmanuel@users.noreply.github.com": "tmimmanuel",
    "revar@users.noreply.github.com": "revaraver",
    # Matrix parity salvage batch (April 2026)
    "sr@samirusani": "samrusani",
@@ -57,12 +60,19 @@ AUTHOR_MAP = {
    "johnnncenaaa77@gmail.com": "johnncenae",
    "thomasjhon6666@gmail.com": "ThomassJonax",
    "focusflow.app.help@gmail.com": "yes999zc",
+    "162235745+0z1-ghb@users.noreply.github.com": "0z1-ghb",
    "yes999zc@163.com": "yes999zc",
    "343873859@qq.com": "DrStrangerUJN",
+    "252818347@qq.com": "hejuntt1014",
    "uzmpsk.dilekakbas@gmail.com": "dlkakbs",
    "beliefanx@gmail.com": "BeliefanX",
    "jefferson@heimdallstrategy.com": "Mind-Dragon",
+    "44753291+Nanako0129@users.noreply.github.com": "Nanako0129",
    "steve.westerhouse@origami-analytics.com": "westers",
+    "yeyitech@users.noreply.github.com": "yeyitech",
+    "260878550+beenherebefore@users.noreply.github.com": "beenherebefore",
+    "79389617+txbxxx@users.noreply.github.com": "txbxxx",
+    "liuhao03@bilibili.com": "liuhao1024",
    "130918800+devorun@users.noreply.github.com": "devorun",
    "surat.s@itm.kmutnb.ac.th": "beesrsj2500",
    "beesr@bee.localdomain": "beesrsj2500",
@@ -96,6 +106,7 @@ AUTHOR_MAP = {
    "keifergu@tencent.com": "keifergu",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "adam.manning@pro-serveinc.com": "amanning3390",
    "thomasgeorgevii09@gmail.com": "tochukwuada",
    "harryykyle1@gmail.com": "hharry11",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
@@ -259,8 +270,10 @@ AUTHOR_MAP = {
    "danielrpike9@gmail.com": "Bartok9",
    "skozyuk@cruxexperts.com": "CruxExperts",
    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
+    "12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
    "mgparkprint@gmail.com": "vlwkaos",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
+    "LyleLengyel@gmail.com": "mcndjxlefnd",
    "wangshengyang2004@163.com": "Wangshengyang2004",
    "hasan.ali13381@gmail.com": "H-Ali13381",
    "xienb@proton.me": "XieNBi",
@@ -312,6 +325,7 @@ AUTHOR_MAP = {
    "dalvidjr2022@gmail.com": "Jr-kenny",
    "m@statecraft.systems": "mbierling",
    "balyan.sid@gmail.com": "alt-glitch",
+    "52913345+alt-glitch@users.noreply.github.com": "alt-glitch",
    "oluwadareab12@gmail.com": "bennytimz",
    "simon@simonmarcus.org": "simon-marcus",
    "xowiekk@gmail.com": "Xowiek",
@@ -412,6 +426,7 @@ AUTHOR_MAP = {
    "tesseracttars@gmail.com": "tesseracttars-creator",
    "tianliangjay@gmail.com": "xingkongliang",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
+    "LyleLengyel@gmail.com": "mcndjxlefnd",
    "unayung@gmail.com": "Unayung",
    "vorvul.danylo@gmail.com": "WorldInnovationsDepartment",
    "win4r@outlook.com": "win4r",
@@ -0,0 +1,645 @@
+---
+name: comfyui
+description: "Generate images, video, and audio with ComfyUI — install, launch, manage nodes/models, run workflows with parameter injection. Uses the official comfy-cli for lifecycle and direct REST API for execution."
+version: 4.1.0
+requires: ComfyUI (local or Comfy Cloud); comfy-cli (pip install comfy-cli)
+author: [kshitijk4poor, alt-glitch]
+license: MIT
+platforms: [macos, linux, windows]
+prerequisites:
+  commands: ["python3"]
+setup:
+  help: "Run scripts/hardware_check.py FIRST to decide local vs Comfy Cloud; then scripts/comfyui_setup.sh auto-installs locally (or use Cloud API key for platform.comfy.org)."
+metadata:
+  hermes:
+    tags:
+      - comfyui
+      - image-generation
+      - stable-diffusion
+      - flux
+      - creative
+      - generative-ai
+      - video-generation
+    related_skills: [stable-diffusion-image-generation, image_gen]
+    category: creative
+---
+
+# ComfyUI
+
+Generate images, video, and audio through ComfyUI using the official `comfy-cli` for
+setup/management and direct REST API calls for workflow execution.
+
+**Reference files in this skill:**
+
+- `references/official-cli.md` — comfy-cli command reference (install, launch, nodes, models)
+- `references/rest-api.md` — ComfyUI REST API endpoints (local + cloud)
+- `references/workflow-format.md` — workflow JSON format, common node types, parameter mapping
+
+**Scripts in this skill:**
+
+- `scripts/hardware_check.py` — detect GPU/VRAM/Apple Silicon, decide local vs Comfy Cloud
+- `scripts/comfyui_setup.sh` — full setup automation (hardware check + install + launch + verify)
+- `scripts/extract_schema.py` — reads workflow JSON, outputs which parameters are controllable
+- `scripts/run_workflow.py` — injects user args, submits workflow, monitors progress, downloads outputs
+- `scripts/check_deps.py` — checks if required custom nodes and models are installed
+
+## When to Use
+
+- User asks to generate images with Stable Diffusion, SDXL, Flux, or other diffusion models
+- User wants to run a specific ComfyUI workflow
+- User wants to chain generative steps (txt2img → upscale → face restore)
+- User needs ControlNet, inpainting, img2img, or other advanced pipelines
+- User asks to manage ComfyUI queue, check models, or install custom nodes
+- User wants video/audio generation via AnimateDiff, Hunyuan, AudioCraft, etc.
+
+## Architecture: Two Layers
+
+```
+┌─────────────────────────────────────────────────────┐
+│ Layer 1: comfy-cli (official)                       │
+│   Setup, lifecycle, nodes, models                   │
+│   comfy install / launch / stop / node / model      │
+└─────────────────────────┬───────────────────────────┘
+                          │
+┌─────────────────────────▼───────────────────────────┐
+│ Layer 2: REST API + skill scripts                   │
+│   Workflow execution, param injection, monitoring   │
+│   POST /api/prompt, GET /api/view, WebSocket        │
+│   scripts/run_workflow.py, extract_schema.py        │
+└─────────────────────────────────────────────────────┘
+```
+
+**Why two layers?** The official CLI handles installation and server management excellently
+but has minimal workflow execution support (just raw file submission, no param injection,
+no structured output). The REST API fills that gap — the scripts in this skill handle the
+param injection, execution monitoring, and output download that the CLI doesn't do.
+
+## Quick Start
+
+### Detect Environment
+
+```bash
+# What's available?
+command -v comfy >/dev/null 2>&1 && echo "comfy-cli: installed"
+curl -s http://127.0.0.1:8188/system_stats 2>/dev/null && echo "server: running"
+
+# Can this machine actually run ComfyUI locally? (GPU/VRAM/Apple Silicon check)
+python3 scripts/hardware_check.py
+```
+
+If nothing is installed, go to **Setup & Onboarding** below — but always run the
+hardware check first, before picking an install path.
+If the server is already running, skip to **Core Workflow**.
+
+## Core Workflow
+
+### Step 1: Get a Workflow
+
+Users provide workflow JSON files. These come from:
+- ComfyUI web editor → "Save (API Format)" button
+- Community downloads (civitai, Reddit, Discord)
+- The `scripts/` directory of this skill (example workflows)
+
+**The workflow must be in API format** (node IDs as keys with `class_type`).
+If user has editor format (has `nodes[]` and `links[]` at top level), they
+need to re-export using "Save (API Format)" in the ComfyUI web editor.
+
+### Step 2: Understand What's Controllable
+
+```bash
+python3 scripts/extract_schema.py workflow_api.json
+```
+
+Output (JSON):
+```json
+{
+  "parameters": {
+    "prompt": {"node_id": "6", "field": "text", "type": "string", "value": "a cat"},
+    "negative_prompt": {"node_id": "7", "field": "text", "type": "string", "value": "bad quality"},
+    "seed": {"node_id": "3", "field": "seed", "type": "int", "value": 42},
+    "steps": {"node_id": "3", "field": "steps", "type": "int", "value": 20},
+    "width": {"node_id": "5", "field": "width", "type": "int", "value": 512},
+    "height": {"node_id": "5", "field": "height", "type": "int", "value": 512}
+  }
+}
+```
+
+### Step 3: Run with Parameters
+
+**Local:**
+```bash
+python3 scripts/run_workflow.py \
+  --workflow workflow_api.json \
+  --args '{"prompt": "a beautiful sunset over mountains", "seed": 123, "steps": 30}' \
+  --output-dir ./outputs
+```
+
+**Cloud:**
+```bash
+python3 scripts/run_workflow.py \
+  --workflow workflow_api.json \
+  --args '{"prompt": "a beautiful sunset", "seed": 123}' \
+  --host https://cloud.comfy.org \
+  --api-key "$COMFY_CLOUD_API_KEY" \
+  --output-dir ./outputs
+```
+
+### Step 4: Present Results
+
+The script outputs JSON with file paths:
+```json
+{
+  "status": "success",
+  "outputs": [
+    {"file": "./outputs/ComfyUI_00001_.png", "node_id": "9", "type": "image"}
+  ]
+}
+```
+
+Show images to the user via `vision_analyze` or return the file path directly.
+
+## Decision Tree
+
+| User says | Tool | Command |
+|-----------|------|---------|
+| "install ComfyUI" | comfy-cli | `comfy install` |
+| "start ComfyUI" | comfy-cli | `comfy launch --background` |
+| "stop ComfyUI" | comfy-cli | `comfy stop` |
+| "install X node" | comfy-cli | `comfy node install <name>` |
+| "download X model" | comfy-cli | `comfy model download --url <url>` |
+| "list installed models" | comfy-cli | `comfy model list` |
+| "list installed nodes" | comfy-cli | `comfy node show installed` |
+| "generate an image" | script | `run_workflow.py --args '{"prompt": "..."}'` |
+| "use this image" (img2img) | REST | upload image, then run_workflow.py |
+| "what can I change in this workflow?" | script | `extract_schema.py workflow.json` |
+| "check if workflow deps are met" | script | `check_deps.py workflow.json` |
+| "what's in the queue?" | REST | `curl http://HOST:8188/queue` |
+| "cancel that" | REST | `curl -X POST http://HOST:8188/interrupt` |
+| "free GPU memory" | REST | `curl -X POST http://HOST:8188/free` |
+
+## Setup & Onboarding
+
+When a user asks to set up ComfyUI, the FIRST thing to do is ask them whether
+they want **Comfy Cloud** (hosted, zero install, API key) or **Local** (install
+ComfyUI on their machine). Do NOT start running install commands or hardware
+checks until they've answered.
+
+**Official docs:** https://docs.comfy.org/installation
+**CLI docs:** https://docs.comfy.org/comfy-cli/getting-started
+**Cloud docs:** https://docs.comfy.org/get_started/cloud
+
+### Step 0: Ask Local vs Cloud (ALWAYS FIRST)
+
+Present the tradeoff clearly and wait for the user to choose. Suggested script:
+
+> "Do you want to run ComfyUI locally on your machine, or use Comfy Cloud?
+>
+> - **Comfy Cloud** — hosted on RTX 6000 Pro GPUs, all models pre-installed, zero setup. Requires an API key (paid subscription). Best if you don't have a capable GPU or want to skip installation.
+> - **Local** — free, but your machine MUST meet the hardware requirements:
+>   - NVIDIA GPU with **≥6 GB VRAM** (≥8 GB recommended for SDXL, ≥12 GB for Flux/video), OR
+>   - AMD GPU with ROCm support (Linux), OR
+>   - Apple Silicon Mac (M1 or newer) with **≥16 GB unified memory** (≥32 GB recommended).
+>   - Intel Macs and machines with no GPU will NOT work — use Cloud instead.
+>
+> Which would you like?"
+
+Route based on their answer:
+
+- **User picks Cloud** → skip to **Path A** (no hardware check needed).
+- **User picks Local** → go to **Step 1: Hardware Check** to verify their machine actually meets the requirements, then pick an install path from Paths B-E based on the verdict.
+- **User is unsure / asks for a recommendation** → run the hardware check anyway and let the verdict decide.
+
+### Step 1: Verify Hardware (ONLY if user chose local)
+
+```bash
+python3 scripts/hardware_check.py --json
+```
+
+It detects OS, GPU (NVIDIA CUDA / AMD ROCm / Apple Silicon / Intel Arc), VRAM,
+and unified/system RAM, then returns a verdict plus a suggested `comfy-cli` flag:
+
+| Verdict    | Meaning                                                   | Action                                          |
+|------------|-----------------------------------------------------------|-------------------------------------------------|
+| `ok`       | ≥8 GB VRAM (discrete) OR ≥32 GB unified (Apple Silicon)   | Local install — use `comfy_cli_flag` from report |
+| `marginal` | SD1.5 works; SDXL tight; Flux/video unlikely              | Local OK for light workflows, else **Path A (Cloud)** |
+| `cloud`    | No usable GPU, <6 GB VRAM, <16 GB Apple unified, Intel Mac | **User chose local but their machine doesn't meet requirements** — surface the `notes` and ask if they want to switch to Cloud |
+
+Hardware thresholds the skill enforces:
+
+- **Discrete GPU minimum:** 6 GB VRAM. Below that, most modern models won't load.
+- **Apple Silicon:** M1 or newer (ARM64). Intel Macs have no MPS backend — Cloud only.
+- **Apple Silicon memory:** 16 GB unified minimum. 8 GB M1/M2 will swap/OOM on SDXL/Flux.
+- **No accelerator at all:** CPU-only is listed as a comfy-cli option but a single SDXL
+  image takes 10+ minutes — treat it as unusable and route to Cloud.
+
+If verdict is `cloud` but the user explicitly wanted local, DO NOT proceed
+silently. Show the `notes` array verbatim, explain which requirement they
+don't meet, and ask whether they want to (a) switch to Cloud or (b) force
+a local install anyway (marginal/cloud-verdict local installs will OOM or
+be unusably slow on modern models).
+
+The report's `comfy_cli_flag` field gives you the exact flag for Step 2 below:
+`--nvidia`, `--amd`, or `--m-series`. For Intel Arc, use Path E (manual install).
+
+Surface the `notes` array verbatim to the user so they understand why a
+particular path was recommended.
+
+### Choosing an Installation Path
+
+Use the hardware check result first. The table below is a fallback for when the user
+has already told you their hardware or you need to narrow down between multiple
+viable paths:
+
+| Situation | Recommended Path |
+|-----------|-----------------|
+| `verdict: cloud` from hardware check | **Path A: Comfy Cloud** |
+| No GPU / just want to try it | **Path A: Comfy Cloud** (zero setup) |
+| Windows + NVIDIA GPU + non-technical | **Path B: ComfyUI Desktop** (one-click installer) |
+| Windows + NVIDIA GPU + technical | **Path C: Portable** or **Path D: comfy-cli** |
+| Linux + any GPU | **Path D: comfy-cli** (easiest) or Path E manual |
+| macOS + Apple Silicon | **Path B: ComfyUI Desktop** or **Path D: comfy-cli** |
+| Headless / server / CI | **Path D: comfy-cli** |
+
+For the fully automated path (hardware check → install → launch), just run:
+
+```bash
+bash scripts/comfyui_setup.sh
+```
+
+It runs `hardware_check.py` internally, refuses to install locally when the verdict
+is `cloud`, picks the right `comfy-cli` flag otherwise, then installs and launches.
+
+---
+
+### Path A: Comfy Cloud (No Local Install)
+
+For users without a capable GPU or who want zero setup.
+Powered by RTX 6000 Pro GPUs, all models pre-installed.
+
+**Docs:** https://docs.comfy.org/get_started/cloud
+
+1. Go to https://comfy.org/cloud and sign up
+2. Get an API key at https://platform.comfy.org/login
+   - Click `+ New` in API Keys section → Generate
+   - Save immediately (only visible once)
+3. Set the key:
+   ```bash
+   export COMFY_CLOUD_API_KEY="comfyui-xxxxxxxxxxxx"
+   ```
+4. Run workflows via the script or web UI:
+   ```bash
+   python3 scripts/run_workflow.py \
+     --workflow workflow_api.json \
+     --args '{"prompt": "a cat"}' \
+     --host https://cloud.comfy.org \
+     --api-key "$COMFY_CLOUD_API_KEY" \
+     --output-dir ./outputs
+   ```
+
+**Pricing:** https://www.comfy.org/cloud/pricing
+Subscription required. Concurrent limits: Free/Standard: 1 job, Creator: 3, Pro: 5.
+
+---
+
+### Path B: ComfyUI Desktop (Windows/macOS)
+
+One-click installer for non-technical users. Currently Beta.
+
+**Docs:** https://docs.comfy.org/installation/desktop
+
+- **Windows (NVIDIA):** https://download.comfy.org/windows/nsis/x64
+- **macOS (Apple Silicon):** Available from https://comfy.org (download page)
+
+Steps:
+1. Download and run installer
+2. Select GPU type (NVIDIA recommended, or CPU mode)
+3. Choose install location (SSD recommended, ~15GB needed)
+4. Optionally migrate from existing ComfyUI Portable install
+5. Desktop launches automatically — web UI opens in browser
+
+Desktop manages its own Python environment. For CLI access to the bundled env:
+```bash
+cd <install_dir>/ComfyUI
+.venv/Scripts/activate   # Windows
+# or use the built-in terminal in the Desktop UI
+```
+
+**Limitations:** Desktop uses stable releases (may lag behind latest).
+Linux not supported for Desktop — use comfy-cli or manual install.
+
+---
+
+### Path C: ComfyUI Portable (Windows Only)
+
+Standalone package with embedded Python. Extract and run. No install.
+
+**Docs:** https://docs.comfy.org/installation/comfyui_portable_windows
+
+1. Download from https://github.com/comfyanonymous/ComfyUI/releases
+   - Standard: Python 3.13 + CUDA 13.0 (modern NVIDIA GPUs)
+   - Alt: PyTorch CUDA 12.6 + Python 3.12 (NVIDIA 10 series and older)
+   - AMD (experimental)
+2. Extract with 7-Zip
+3. Run `run_nvidia_gpu.bat` (or `run_cpu.bat`)
+4. Wait for "To see the GUI go to: http://127.0.0.1:8188"
+
+Update: run `update/update_comfyui.bat` (latest commit) or
+`update/update_comfyui_stable.bat` (latest stable release).
+
+---
+
+### Path D: comfy-cli (All Platforms — Recommended for Agents)
+
+The official CLI is the best path for headless/automated setups.
+
+**Docs:** https://docs.comfy.org/comfy-cli/getting-started
+**Repo:** https://github.com/Comfy-Org/comfy-cli
+
+#### Prerequisites
+- Python 3.10+ (3.13 recommended)
+- pip (or conda/uv)
+- GPU drivers installed (CUDA for NVIDIA, ROCm for AMD)
+
+#### Install comfy-cli
+
+```bash
+pip install comfy-cli
+# or
+uvx --from comfy-cli comfy --help
+```
+
+Disable analytics (avoids interactive prompt):
+```bash
+comfy --skip-prompt tracking disable
+```
+
+#### Install ComfyUI
+
+```bash
+# Interactive (prompts for GPU type)
+comfy install
+
+# Non-interactive variants:
+comfy --skip-prompt install --nvidia              # NVIDIA (CUDA)
+comfy --skip-prompt install --amd                 # AMD (ROCm, Linux)
+comfy --skip-prompt install --m-series            # Apple Silicon (MPS)
+comfy --skip-prompt install --cpu                 # CPU only (slow)
+
+# With faster dependency resolution:
+comfy --skip-prompt install --nvidia --fast-deps
+```
+
+Default location: `~/comfy/ComfyUI` (Linux), `~/Documents/comfy/ComfyUI` (macOS/Win).
+Override with: `comfy --workspace /custom/path install`
+
+#### Launch Server
+
+```bash
+comfy launch --background              # background daemon on :8188
+comfy launch                           # foreground (see logs)
+comfy launch -- --listen 0.0.0.0       # accessible on LAN
+comfy launch -- --port 8190            # custom port
+comfy launch -- --lowvram              # low VRAM mode (6GB cards)
+```
+
+Verify server is running:
+```bash
+curl -s http://127.0.0.1:8188/system_stats | python3 -m json.tool
+```
+
+Stop background server:
+```bash
+comfy stop
+```
+
+---
+
+### Path E: Manual Install (Advanced / All Hardware)
+
+For full control or unsupported hardware (Ascend NPU, Cambricon MLU, Intel Arc).
+
+**Docs:** https://docs.comfy.org/installation/manual_install
+**GitHub:** https://github.com/comfyanonymous/ComfyUI
+
+```bash
+# 1. Create environment
+conda create -n comfyenv python=3.13
+conda activate comfyenv
+
+# 2. Clone
+git clone https://github.com/comfyanonymous/ComfyUI.git
+cd ComfyUI
+
+# 3. Install PyTorch (pick your hardware)
+# NVIDIA:
+pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu130
+# AMD (ROCm 6.4):
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4
+# Apple Silicon:
+pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+# Intel Arc:
+pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/xpu
+# CPU only:
+pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+
+# 4. Install ComfyUI deps
+pip install -r requirements.txt
+
+# 5. Run
+python main.py
+# With options: python main.py --listen 0.0.0.0 --port 8188
+```
+
+---
+
+### Post-Install: Download Models
+
+ComfyUI needs at least one checkpoint model to generate images.
+
+**Using comfy-cli:**
+```bash
+# SDXL (general purpose, ~6.5GB)
+comfy model download \
+  --url "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors" \
+  --relative-path models/checkpoints
+
+# SD 1.5 (lighter, ~4GB, good for low VRAM)
+comfy model download \
+  --url "https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" \
+  --relative-path models/checkpoints
+
+# From CivitAI (may need API token):
+comfy model download \
+  --url "https://civitai.com/api/download/models/128713" \
+  --relative-path models/checkpoints \
+  --set-civitai-api-token "YOUR_TOKEN"
+
+# LoRA adapters:
+comfy model download --url "<URL>" --relative-path models/loras
+```
+
+**Manual download:** Place `.safetensors` / `.ckpt` files directly into the
+`ComfyUI/models/checkpoints/` directory (or `loras/`, `vae/`, etc.).
+
+List installed models:
+```bash
+comfy model list
+```
+
+---
+
+### Post-Install: Install Custom Nodes
+
+Custom nodes extend ComfyUI's capabilities (upscaling, video, ControlNet, etc.).
+
+```bash
+comfy node install comfyui-impact-pack           # popular utility pack
+comfy node install comfyui-animatediff-evolved    # video generation
+comfy node install comfyui-controlnet-aux         # ControlNet preprocessors
+comfy node install comfyui-essentials             # common helpers
+comfy node update all                            # update all nodes
+```
+
+Check what's installed:
+```bash
+comfy node show installed
+```
+
+Install deps for a specific workflow:
+```bash
+comfy node install-deps --workflow=workflow_api.json
+```
+
+---
+
+### Post-Install: Verify Setup
+
+```bash
+# Check server is responsive
+curl -s http://127.0.0.1:8188/system_stats | python3 -m json.tool
+
+# Check a workflow's dependencies
+python3 scripts/check_deps.py workflow_api.json --host 127.0.0.1 --port 8188
+
+# Test a generation
+python3 scripts/run_workflow.py \
+  --workflow workflow_api.json \
+  --args '{"prompt": "test image, high quality"}' \
+  --output-dir ./test-outputs
+```
+
+## Image Upload (img2img / Inpainting)
+
+Upload files directly via REST:
+
+```bash
+# Upload input image
+curl -X POST "http://127.0.0.1:8188/upload/image" \
+  -F "image=@photo.png" -F "type=input" -F "overwrite=true"
+# Returns: {"name": "photo.png", "subfolder": "", "type": "input"}
+
+# Upload mask for inpainting
+curl -X POST "http://127.0.0.1:8188/upload/mask" \
+  -F "image=@mask.png" -F "type=input" \
+  -F 'original_ref={"filename":"photo.png","subfolder":"","type":"input"}'
+```
+
+Then reference the uploaded filename in workflow args:
+```bash
+python3 scripts/run_workflow.py --workflow inpaint.json \
+  --args '{"image": "photo.png", "mask": "mask.png", "prompt": "fill with flowers"}'
+```
+
+## Cloud Execution
+
+Base URL: `https://cloud.comfy.org`
+Auth: `X-API-Key` header
+
+```bash
+# Submit workflow
+python3 scripts/run_workflow.py \
+  --workflow workflow_api.json \
+  --args '{"prompt": "cyberpunk city"}' \
+  --host https://cloud.comfy.org \
+  --api-key "$COMFY_CLOUD_API_KEY" \
+  --output-dir ./outputs \
+  --timeout 300
+
+# Upload image for cloud workflows
+curl -X POST "https://cloud.comfy.org/api/upload/image" \
+  -H "X-API-Key: $COMFY_CLOUD_API_KEY" \
+  -F "image=@input.png" -F "type=input" -F "overwrite=true"
+```
+
+Concurrent job limits:
+| Tier | Concurrent Jobs |
+|------|----------------|
+| Free/Standard | 1 |
+| Creator | 3 |
+| Pro | 5 |
+
+Extra submissions queue automatically.
+
+## Queue & System Management
+
+```bash
+# Check queue
+curl -s http://127.0.0.1:8188/queue | python3 -m json.tool
+
+# Clear pending queue
+curl -X POST http://127.0.0.1:8188/queue -d '{"clear": true}'
+
+# Cancel running job
+curl -X POST http://127.0.0.1:8188/interrupt
+
+# Free GPU memory (unload all models)
+curl -X POST http://127.0.0.1:8188/free -H "Content-Type: application/json" \
+  -d '{"unload_models": true, "free_memory": true}'
+
+# System stats (VRAM, RAM, GPU info)
+curl -s http://127.0.0.1:8188/system_stats | python3 -m json.tool
+```
+
+## Pitfalls
+
+1. **API format required** — `comfy run` and the scripts only accept API-format workflow JSON.
+   If the user has editor format (from "Save" not "Save (API Format)"), they need to
+   re-export. Check: API format has `class_type` in each node object, editor format has
+   top-level `nodes` and `links` arrays.
+
+2. **Server must be running** — All execution requires a live server. `comfy launch --background`
+   starts one. Check with `curl http://127.0.0.1:8188/system_stats`.
+
+3. **Model names are exact** — Case-sensitive, includes file extension. Use
+   `comfy model list` to discover what's installed.
+
+4. **Missing custom nodes** — "class_type not found" means a required node isn't installed.
+   Run `check_deps.py` to find what's missing, then `comfy node install <name>`.
+
+5. **Working directory** — `comfy-cli` auto-detects the ComfyUI workspace. If commands
+   fail with "no workspace found", use `comfy --workspace /path/to/ComfyUI <command>`
+   or `comfy set-default /path/to/ComfyUI`.
+
+6. **Cloud vs local output download** — Cloud `/api/view` returns a 302 redirect to a
+   signed URL. Always follow redirects (`curl -L`). The `run_workflow.py` script handles
+   this automatically.
+
+7. **Timeout for video/audio** — Long generations (video, high step counts) can take
+   minutes. Pass `--timeout 600` to `run_workflow.py`. Default is 120 seconds.
+
+8. **tracking prompt** — First run of `comfy` may prompt for analytics tracking consent.
+   Use `comfy --skip-prompt tracking disable` to skip it non-interactively.
+
+9. **comfy-cli invocation via uvx** — If comfy-cli is not installed globally, invoke with
+   `uvx --from comfy-cli comfy <command>`. All examples in this skill use bare `comfy`
+   but prepend `uvx --from comfy-cli` if needed.
+
+## Verification Checklist
+
+- [ ] `hardware_check.py` verdict is `ok` OR the user explicitly chose Comfy Cloud
+- [ ] `comfy` available on PATH (or `uvx --from comfy-cli comfy --help` works)
+- [ ] `curl http://127.0.0.1:8188/system_stats` returns JSON
+- [ ] `comfy model list` shows at least one checkpoint
+- [ ] Workflow JSON is in API format (has `class_type` keys)
+- [ ] `check_deps.py` reports no missing nodes/models
+- [ ] Test run completes and outputs are saved
@@ -0,0 +1,268 @@
+# comfy-cli Command Reference
+
+Official CLI from [Comfy-Org/comfy-cli](https://github.com/Comfy-Org/comfy-cli).
+Docs: https://docs.comfy.org/comfy-cli/getting-started
+
+## Installation
+
+```bash
+pip install comfy-cli
+# or
+uvx --from comfy-cli comfy --help
+```
+
+First run may prompt for analytics. Disable non-interactively:
+```bash
+comfy --skip-prompt tracking disable
+```
+
+## Global Options
+
+| Option | Description |
+|--------|-------------|
+| `--workspace <path>` | Target a specific ComfyUI workspace |
+| `--recent` | Use most recently used workspace |
+| `--here` | Use current directory as workspace |
+| `--skip-prompt` | No interactive prompts (use defaults) |
+| `-v` / `--version` | Print version |
+
+Workspace resolution priority:
+1. `--workspace` (explicit path)
+2. `--recent` (from config)
+3. `--here` (cwd)
+4. `comfy set-default` path
+5. Most recently used
+6. `~/comfy/ComfyUI` (Linux) or `~/Documents/comfy/ComfyUI` (macOS)
+
+## Commands
+
+### `comfy install`
+
+Download and install ComfyUI + ComfyUI-Manager.
+
+```bash
+comfy install                    # interactive GPU selection
+comfy install --nvidia           # NVIDIA (CUDA)
+comfy install --amd              # AMD (ROCm)
+comfy install --m-series         # Apple Silicon (MPS)
+comfy install --cpu              # CPU only
+comfy install --fast-deps        # use uv for faster deps
+comfy install --skip-manager     # skip ComfyUI-Manager
+```
+
+| Option | Description |
+|--------|-------------|
+| `--nvidia` | NVIDIA GPU |
+| `--amd` | AMD GPU (ROCm) |
+| `--m-series` | Apple Silicon |
+| `--cpu` | CPU only |
+| `--cuda-version` | 11.8, 12.1, 12.4, 12.6, 12.8, 12.9, 13.0 |
+| `--rocm-version` | 6.1, 6.2, 6.3, 7.0, 7.1 |
+| `--fast-deps` | Use uv for dependency resolution |
+| `--skip-manager` | Don't install ComfyUI-Manager |
+| `--skip-torch-or-directml` | Skip PyTorch install |
+| `--version <ver>` | Specific ComfyUI version (e.g. `0.2.0`, `latest`, `nightly`) |
+| `--commit <hash>` | Install specific commit |
+| `--pr "#1234"` | Install from a PR |
+| `--restore` | Restore deps for existing install |
+
+Default location: `~/comfy/ComfyUI` (Linux), `~/Documents/comfy/ComfyUI` (macOS/Win).
+
+### `comfy launch`
+
+Start ComfyUI server.
+
+```bash
+comfy launch                           # foreground on :8188
+comfy launch --background              # background daemon
+comfy launch -- --listen 0.0.0.0       # listen on all interfaces
+comfy launch -- --port 8190            # custom port
+comfy launch -- --cpu                  # force CPU mode
+comfy launch --background -- --listen 0.0.0.0 --port 8190
+```
+
+| Option | Description |
+|--------|-------------|
+| `--background` | Run as background daemon |
+| `--frontend-pr "#456"` | Test a frontend PR |
+| Extra args after `--` | Passed directly to ComfyUI's `main.py` |
+
+Common extra args: `--listen`, `--port`, `--cpu`, `--lowvram`, `--novram`,
+`--fp16-vae`, `--force-fp32`.
+
+### `comfy stop`
+
+Stop background ComfyUI instance.
+
+```bash
+comfy stop
+```
+
+### `comfy run`
+
+Execute a raw workflow JSON file against a running server.
+
+```bash
+comfy run --workflow workflow_api.json
+comfy run --workflow workflow_api.json --host 10.0.0.5 --port 8188
+comfy run --workflow workflow_api.json --timeout 300 --wait
+```
+
+| Option | Description |
+|--------|-------------|
+| `--workflow` | Path to API-format workflow JSON (required) |
+| `--host` | Server hostname (default: 127.0.0.1) |
+| `--port` | Server port (default: 8188) |
+| `--timeout` | Seconds to wait (default: 30) |
+| `--wait/--no-wait` | Wait for completion (default: wait) |
+| `--verbose` | Show per-node execution details |
+
+**Limitations:** No parameter injection, no structured output, no image download.
+For agent use, prefer `scripts/run_workflow.py` which adds those capabilities.
+
+### `comfy which`
+
+Show which ComfyUI workspace is currently targeted.
+
+```bash
+comfy which
+comfy --recent which
+```
+
+### `comfy set-default`
+
+Set the default workspace path.
+
+```bash
+comfy set-default /path/to/ComfyUI
+comfy set-default /path/to/ComfyUI --launch-extras="--listen 0.0.0.0"
+```
+
+### `comfy update`
+
+Update ComfyUI or custom nodes.
+
+```bash
+comfy update               # update ComfyUI core
+comfy node update all      # update all custom nodes
+```
+
+---
+
+## `comfy node` — Custom Node Management
+
+All node operations use ComfyUI-Manager (cm-cli) under the hood.
+
+```bash
+comfy node show installed              # list installed nodes
+comfy node show enabled                # list enabled nodes
+comfy node show all                    # all available nodes
+comfy node simple-show installed       # compact list
+
+comfy node install comfyui-impact-pack # install by name
+comfy node install <name> --uv-compile # with unified dep resolution (Manager v4.1+)
+comfy node uninstall <name>            # remove
+comfy node update <name>               # update one
+comfy node update all                  # update all
+comfy node enable <name>               # enable disabled node
+comfy node disable <name>              # disable without uninstalling
+comfy node fix <name>                  # fix broken dependencies
+
+comfy node install-deps --workflow=workflow.json  # install all deps a workflow needs
+comfy node deps-in-workflow --workflow=w.json --output=deps.json  # extract dep list
+
+comfy node save-snapshot               # save current state
+comfy node restore-snapshot <file>     # restore from snapshot
+
+comfy node bisect start                # find culprit node (binary search)
+comfy node bisect good                 # current set is fine
+comfy node bisect bad                  # problem is in current set
+comfy node bisect reset                # abort bisect
+```
+
+### Dependency Resolution Options
+
+| Flag | Description |
+|------|-------------|
+| `--fast-deps` | comfy-cli built-in uv resolver |
+| `--uv-compile` | ComfyUI-Manager v4.1+ unified resolver (recommended) |
+| `--no-deps` | Skip dep installation |
+
+Set uv-compile as default: `comfy manager uv-compile-default true`
+
+---
+
+## `comfy model` — Model Management
+
+```bash
+comfy model list                       # list all downloaded models
+comfy model list --relative-path models/checkpoints  # specific folder
+
+comfy model download --url <URL>       # download model
+comfy model download --url <URL> --relative-path models/loras
+comfy model download --url <URL> --filename custom_name.safetensors
+
+comfy model remove                     # interactive removal
+comfy model remove --relative-path models/checkpoints --model-names "model.safetensors"
+```
+
+| Option | Description |
+|--------|-------------|
+| `--url` | Download URL (CivitAI, HuggingFace, direct) |
+| `--relative-path` | Subdirectory under workspace (e.g. `models/checkpoints`) |
+| `--filename` | Custom filename to save as |
+| `--set-civitai-api-token` | Set CivitAI API token |
+| `--set-hf-api-token` | Set HuggingFace API token |
+| `--downloader` | `httpx` (default) or `aria2` |
+
+Model directory structure:
+```
+ComfyUI/models/
+├── checkpoints/     # Full model files (.safetensors, .ckpt)
+├── loras/           # LoRA adapters
+├── vae/             # VAE models
+├── controlnet/      # ControlNet models
+├── clip/            # CLIP text encoders
+├── clip_vision/     # CLIP vision encoders
+├── upscale_models/  # Upscaler models (ESRGAN, etc.)
+├── embeddings/      # Textual inversion embeddings
+├── unet/            # UNet models
+└── diffusion_models/ # Diffusion model files
+```
+
+---
+
+## `comfy manager` — ComfyUI-Manager Settings
+
+```bash
+comfy manager disable              # disable Manager completely
+comfy manager enable-gui           # enable new GUI
+comfy manager disable-gui          # disable GUI (API-only)
+comfy manager enable-legacy-gui    # legacy GUI
+comfy manager uv-compile-default true   # make --uv-compile the default
+comfy manager clear                # clear startup action
+```
+
+---
+
+## `comfy pr-cache` — Frontend PR Cache
+
+```bash
+comfy pr-cache list                # list cached PR builds
+comfy pr-cache clean               # clean all
+comfy pr-cache clean 456           # clean specific PR
+```
+
+Cache expires after 7 days; max 10 builds kept.
+
+---
+
+## Configuration
+
+Config file location:
+- Linux: `~/.config/comfy-cli/config.ini`
+- macOS: `~/Library/Application Support/comfy-cli/config.ini`
+- Windows: `~/AppData/Local/comfy-cli/config.ini`
+
+Stores: default workspace, recent workspace, background server info, API tokens,
+manager GUI mode, launch extras.
@@ -0,0 +1,256 @@
+# ComfyUI REST API Reference
+
+ComfyUI exposes a REST API + WebSocket for workflow execution and management.
+Same API surface for local servers and Comfy Cloud (with auth differences).
+
+## Connection
+
+| | Local | Cloud |
+|---|---|---|
+| Base URL | `http://127.0.0.1:8188` | `https://cloud.comfy.org` |
+| Auth | None (or bearer token) | `X-API-Key` header |
+| WebSocket | `ws://host:port/ws?clientId={uuid}` | `wss://cloud.comfy.org/ws?clientId={uuid}&token={API_KEY}` |
+| Output download | Direct bytes from `/view` | 302 redirect → signed URL (use `curl -L`) |
+
+## Workflow Execution
+
+### Submit Workflow
+
+```bash
+# Local
+curl -X POST "http://127.0.0.1:8188/prompt" \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": '"$(cat workflow_api.json)"', "client_id": "'"$(uuidgen)"'"}'
+
+# Cloud
+curl -X POST "https://cloud.comfy.org/api/prompt" \
+  -H "X-API-Key: $COMFY_CLOUD_API_KEY" \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": '"$(cat workflow_api.json)"'}'
+```
+
+**Response:**
+```json
+{"prompt_id": "abc-123-def", "number": 1, "node_errors": {}}
+```
+
+If `node_errors` is non-empty, the workflow has validation errors (missing nodes, bad inputs).
+
+### Check Job Status (Cloud)
+
+```bash
+curl -X GET "https://cloud.comfy.org/api/job/{prompt_id}/status" \
+  -H "X-API-Key: $COMFY_CLOUD_API_KEY"
+```
+
+| Status | Description |
+|--------|-------------|
+| `pending` | Queued, waiting to start |
+| `in_progress` | Currently executing |
+| `completed` | Finished successfully |
+| `failed` | Encountered an error |
+| `cancelled` | Cancelled by user |
+
+### Get History (Local)
+
+```bash
+# All history
+curl -s "http://127.0.0.1:8188/history"
+
+# Specific prompt
+curl -s "http://127.0.0.1:8188/history/{prompt_id}"
+```
+
+Response contains `outputs` keyed by node ID with file references.
+
+### Download Output
+
+```bash
+# Local
+curl -s "http://127.0.0.1:8188/view?filename=ComfyUI_00001_.png&subfolder=&type=output" \
+  -o output.png
+
+# Cloud (follow redirect)
+curl -L "https://cloud.comfy.org/api/view?filename=ComfyUI_00001_.png&subfolder=&type=output" \
+  -H "X-API-Key: $COMFY_CLOUD_API_KEY" \
+  -o output.png
+```
+
+---
+
+## WebSocket Monitoring
+
+Connect to WebSocket for real-time execution progress.
+
+### Connection
+
+```bash
+# Local
+wscat -c "ws://127.0.0.1:8188/ws?clientId=MY-UUID"
+
+# Cloud
+wscat -c "wss://cloud.comfy.org/ws?clientId=MY-UUID&token=API_KEY"
+```
+
+### Message Types (JSON)
+
+| Type | When | Key Fields |
+|------|------|------------|
+| `status` | Queue change | `queue_remaining` |
+| `execution_start` | Workflow begins | `prompt_id` |
+| `executing` | Node running | `node` (ID), `prompt_id` |
+| `progress` | Sampling steps | `node`, `value`, `max` |
+| `executed` | Node output ready | `node`, `output` |
+| `execution_cached` | Nodes skipped | `nodes` (list of IDs) |
+| `execution_success` | All done | `prompt_id` |
+| `execution_error` | Failure | `exception_type`, `exception_message`, `traceback` |
+| `execution_interrupted` | Cancelled | `prompt_id` |
+
+When `executing` has `node: null`, the workflow is complete.
+
+### Binary Messages (Preview Images)
+
+Format: `[4B type][4B image_type: 1=JPEG, 2=PNG][image_data...]`
+
+---
+
+## File Upload
+
+### Upload Image
+
+```bash
+curl -X POST "http://127.0.0.1:8188/upload/image" \
+  -F "image=@photo.png" \
+  -F "type=input" \
+  -F "overwrite=true"
+```
+
+Response: `{"name": "photo.png", "subfolder": "", "type": "input"}`
+
+### Upload Mask
+
+```bash
+curl -X POST "http://127.0.0.1:8188/upload/mask" \
+  -F "image=@mask.png" \
+  -F "type=input" \
+  -F 'original_ref={"filename":"photo.png","subfolder":"","type":"input"}'
+```
+
+---
+
+## Node & Model Discovery
+
+### Object Info (All Nodes)
+
+```bash
+curl -s "http://127.0.0.1:8188/object_info" | python3 -m json.tool
+# Returns all node types with input/output definitions
+
+curl -s "http://127.0.0.1:8188/object_info/KSampler"
+# Returns info for one specific node type
+```
+
+### Models by Folder
+
+```bash
+curl -s "http://127.0.0.1:8188/models/checkpoints"
+curl -s "http://127.0.0.1:8188/models/loras"
+curl -s "http://127.0.0.1:8188/models/vae"
+curl -s "http://127.0.0.1:8188/models/controlnet"
+curl -s "http://127.0.0.1:8188/models/clip"
+curl -s "http://127.0.0.1:8188/models/upscale_models"
+curl -s "http://127.0.0.1:8188/models/embeddings"
+```
+
+Returns arrays of filenames (relative to model folder).
+
+---
+
+## Queue Management
+
+```bash
+# View queue (running + pending)
+curl -s "http://127.0.0.1:8188/queue"
+
+# Clear all pending
+curl -X POST "http://127.0.0.1:8188/queue" \
+  -H "Content-Type: application/json" \
+  -d '{"clear": true}'
+
+# Delete specific items from queue
+curl -X POST "http://127.0.0.1:8188/queue" \
+  -H "Content-Type: application/json" \
+  -d '{"delete": ["prompt_id_1", "prompt_id_2"]}'
+
+# Cancel currently running job
+curl -X POST "http://127.0.0.1:8188/interrupt"
+```
+
+---
+
+## System Management
+
+```bash
+# System stats (VRAM, RAM, GPU, versions)
+curl -s "http://127.0.0.1:8188/system_stats"
+
+# Free GPU memory
+curl -X POST "http://127.0.0.1:8188/free" \
+  -H "Content-Type: application/json" \
+  -d '{"unload_models": true, "free_memory": true}'
+```
+
+---
+
+## ComfyUI Manager Endpoints (Optional)
+
+These require ComfyUI-Manager installed.
+
+```bash
+# Install custom node from git repo
+curl -X POST "http://127.0.0.1:8188/manager/queue/install" \
+  -H "Content-Type: application/json" \
+  -d '{"git_url": "https://github.com/user/comfyui-node.git"}'
+
+# Check install queue status
+curl -s "http://127.0.0.1:8188/manager/queue/status"
+
+# Install model
+curl -X POST "http://127.0.0.1:8188/manager/queue/install_model" \
+  -H "Content-Type: application/json" \
+  -d '{"url": "https://...", "path": "models/checkpoints", "filename": "model.safetensors"}'
+```
+
+---
+
+## POST /prompt Payload Format
+
+```json
+{
+  "prompt": {
+    "3": {
+      "class_type": "KSampler",
+      "inputs": {
+        "seed": 42,
+        "steps": 20,
+        "cfg": 7.5,
+        "sampler_name": "euler",
+        "scheduler": "normal",
+        "denoise": 1.0,
+        "model": ["4", 0],
+        "positive": ["6", 0],
+        "negative": ["7", 0],
+        "latent_image": ["5", 0]
+      }
+    }
+  },
+  "client_id": "unique-uuid-for-ws-filtering",
+  "extra_data": {
+    "api_key_comfy_org": "optional-partner-node-key"
+  }
+}
+```
+
+- `prompt`: The workflow graph (API format)
+- `client_id`: UUID for WebSocket event filtering
+- `extra_data.api_key_comfy_org`: Required for paid partner nodes (Flux Pro, Ideogram, etc.)
@@ -0,0 +1,218 @@
+# ComfyUI Workflow JSON Format
+
+## Two Formats
+
+ComfyUI uses two workflow formats. **Only API format works for programmatic execution.**
+
+### API Format (what we use)
+
+Top-level keys are string node IDs. Each node has `class_type` and `inputs`:
+
+```json
+{
+  "3": {
+    "class_type": "KSampler",
+    "inputs": {
+      "seed": 156680208700286,
+      "steps": 20,
+      "cfg": 8,
+      "sampler_name": "euler",
+      "scheduler": "normal",
+      "denoise": 1.0,
+      "model": ["4", 0],
+      "positive": ["6", 0],
+      "negative": ["7", 0],
+      "latent_image": ["5", 0]
+    },
+    "_meta": {"title": "KSampler"}
+  },
+  "4": {
+    "class_type": "CheckpointLoaderSimple",
+    "inputs": {
+      "ckpt_name": "v1-5-pruned-emaonly.safetensors"
+    }
+  },
+  "5": {
+    "class_type": "EmptyLatentImage",
+    "inputs": {"width": 512, "height": 512, "batch_size": 1}
+  },
+  "6": {
+    "class_type": "CLIPTextEncode",
+    "inputs": {
+      "text": "a beautiful cat",
+      "clip": ["4", 1]
+    }
+  },
+  "7": {
+    "class_type": "CLIPTextEncode",
+    "inputs": {
+      "text": "bad quality, ugly",
+      "clip": ["4", 1]
+    }
+  },
+  "9": {
+    "class_type": "SaveImage",
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": ["8", 0]
+    }
+  }
+}
+```
+
+**How to detect:** Top-level keys are numeric strings, each value has `class_type`.
+
+### Editor Format (not directly executable)
+
+Has `nodes[]` and `links[]` arrays — the visual graph data from the ComfyUI web editor.
+This is what "Save" produces. For API use, export with "Save (API Format)" instead.
+
+**How to detect:** Top-level has `"nodes"` and `"links"` keys.
+
+---
+
+## Input Connections
+
+Inputs can be:
+- **Literal values**: `"text": "a cat"`, `"seed": 42`, `"width": 512`
+- **Links to other nodes**: `["node_id", output_index]` — e.g., `["4", 0]` means
+  output slot 0 of node "4"
+
+Only literal values can be modified by parameter injection. Linked inputs are wiring.
+
+---
+
+## Common Node Types and Their Controllable Parameters
+
+### Text Prompts
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `CLIPTextEncode` | `text` (the prompt string) |
+| `CLIPTextEncodeSDXL` | `text_g`, `text_l`, `width`, `height` |
+
+Usually: positive prompt → one CLIPTextEncode, negative prompt → another.
+Distinguish by checking the `_meta.title` field or by tracing which feeds into
+positive vs negative inputs of the sampler.
+
+### Sampling
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `KSampler` | `seed`, `steps`, `cfg`, `sampler_name`, `scheduler`, `denoise` |
+| `KSamplerAdvanced` | `noise_seed`, `steps`, `cfg`, `sampler_name`, `scheduler`, `start_at_step`, `end_at_step` |
+| `SamplerCustom` | `cfg`, `sampler`, `sigmas` |
+
+### Image Dimensions
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `EmptyLatentImage` | `width`, `height`, `batch_size` |
+| `LatentUpscale` | `width`, `height`, `upscale_method` |
+
+### Model Loading
+
+| Node Class | Key Fields | Model Folder |
+|------------|-----------|-------------|
+| `CheckpointLoaderSimple` | `ckpt_name` | `checkpoints` |
+| `LoraLoader` | `lora_name`, `strength_model`, `strength_clip` | `loras` |
+| `VAELoader` | `vae_name` | `vae` |
+| `ControlNetLoader` | `control_net_name` | `controlnet` |
+| `CLIPLoader` | `clip_name` | `clip` |
+| `UNETLoader` | `unet_name` | `unet` |
+| `DiffusionModelLoader` | `model_name` | `diffusion_models` |
+| `UpscaleModelLoader` | `model_name` | `upscale_models` |
+
+### Image Input/Output
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `LoadImage` | `image` (filename on server, after upload) |
+| `LoadImageMask` | `image`, `channel` |
+| `SaveImage` | `filename_prefix` |
+| `PreviewImage` | (no controllable fields, just previews) |
+
+### ControlNet
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `ControlNetApply` | `strength` |
+| `ControlNetApplyAdvanced` | `strength`, `start_percent`, `end_percent` |
+
+### Video (AnimateDiff)
+
+| Node Class | Key Fields |
+|------------|-----------|
+| `ADE_AnimateDiffLoaderWithContext` | `model_name`, `motion_scale` |
+| `VHS_VideoCombine` | `frame_rate`, `format`, `filename_prefix` |
+
+---
+
+## Parameter Injection Pattern
+
+To modify a workflow programmatically:
+
+```python
+import json, copy
+
+with open("workflow_api.json") as f:
+    workflow = json.load(f)
+
+# Deep copy to avoid mutating original
+wf = copy.deepcopy(workflow)
+
+# Inject parameters by node ID + field name
+wf["6"]["inputs"]["text"] = "a beautiful sunset"     # positive prompt
+wf["7"]["inputs"]["text"] = "ugly, blurry"           # negative prompt
+wf["3"]["inputs"]["seed"] = 42                       # seed
+wf["3"]["inputs"]["steps"] = 30                      # steps
+wf["5"]["inputs"]["width"] = 1024                    # width
+wf["5"]["inputs"]["height"] = 1024                   # height
+```
+
+The `scripts/extract_schema.py` in this skill automates discovering which
+node IDs and fields correspond to which user-facing parameters.
+
+---
+
+## Identifying Controllable Parameters (Heuristics)
+
+When analyzing an unknown workflow, these patterns identify user-facing params:
+
+1. **Prompt text**: Any `CLIPTextEncode` → `text` field. Title/meta usually
+   indicates positive vs negative.
+
+2. **Seed**: Any `KSampler` / `KSamplerAdvanced` → `seed` / `noise_seed`.
+   Randomizable — set to different values for variations.
+
+3. **Dimensions**: `EmptyLatentImage` → `width`, `height`. Common: 512, 768,
+   1024 (must be multiples of 8).
+
+4. **Steps**: `KSampler` → `steps`. More = higher quality + slower. 20-50 typical.
+
+5. **CFG scale**: `KSampler` → `cfg`. How closely to follow prompt. 5-15 typical.
+
+6. **Model/checkpoint**: `CheckpointLoaderSimple` → `ckpt_name`. Must match an
+   installed model filename exactly.
+
+7. **LoRA**: `LoraLoader` → `lora_name`, `strength_model`. Adapter name + weight.
+
+8. **Images for img2img**: `LoadImage` → `image`. Filename on server after upload.
+
+9. **Denoise strength**: `KSampler` → `denoise`. 0.0-1.0. Lower = closer to input
+   image. Only relevant for img2img.
+
+---
+
+## Output Nodes
+
+Output is produced by these node types:
+
+| Node | Output Key | Content |
+|------|-----------|---------|
+| `SaveImage` | `images` | List of `{filename, subfolder, type}` |
+| `VHS_VideoCombine` | `gifs` or `videos` | Video file references |
+| `SaveAudio` | `audio` | Audio file references |
+| `PreviewImage` | `images` | Temporary preview (not saved) |
+
+After execution, fetch outputs from `/history/{prompt_id}` → `outputs` → `{node_id}`.
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""
+check_deps.py — Check if a ComfyUI workflow's dependencies (custom nodes and models) are installed.
+
+Queries the running ComfyUI server for installed nodes (via /object_info) and models
+(via /models/{folder}), then diffs against what the workflow requires.
+
+Usage:
+    python3 check_deps.py workflow_api.json
+    python3 check_deps.py workflow_api.json --host 127.0.0.1 --port 8188
+    python3 check_deps.py workflow_api.json --host https://cloud.comfy.org --api-key KEY
+
+Output format:
+    {
+      "is_ready": true/false,
+      "missing_nodes": ["NodeClassName", ...],
+      "missing_models": [{"class_type": "...", "field": "...", "value": "...", "folder": "..."}],
+      "installed_nodes_count": 123,
+      "required_nodes": ["KSampler", "CLIPTextEncode", ...]
+    }
+
+Requires: Python 3.10+, requests (or urllib as fallback)
+"""
+
+import json
+import sys
+import argparse
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+try:
+    import requests
+    HAS_REQUESTS = True
+except ImportError:
+    HAS_REQUESTS = False
+    import urllib.request
+    import urllib.error
+
+# Known model loader node types and which folder they reference
+MODEL_LOADERS = {
+    "CheckpointLoaderSimple": ("ckpt_name", "checkpoints"),
+    "CheckpointLoader": ("ckpt_name", "checkpoints"),
+    "unCLIPCheckpointLoader": ("ckpt_name", "checkpoints"),
+    "LoraLoader": ("lora_name", "loras"),
+    "LoraLoaderModelOnly": ("lora_name", "loras"),
+    "VAELoader": ("vae_name", "vae"),
+    "ControlNetLoader": ("control_net_name", "controlnet"),
+    "DiffControlNetLoader": ("control_net_name", "controlnet"),
+    "CLIPLoader": ("clip_name", "clip"),
+    "DualCLIPLoader": ("clip_name1", "clip"),
+    "UNETLoader": ("unet_name", "unet"),
+    "DiffusionModelLoader": ("model_name", "diffusion_models"),
+    "UpscaleModelLoader": ("model_name", "upscale_models"),
+    "CLIPVisionLoader": ("clip_name", "clip_vision"),
+    "StyleModelLoader": ("style_model_name", "style_models"),
+    "GLIGENLoader": ("gligen_name", "gligen"),
+    "HypernetworkLoader": ("hypernetwork_name", "hypernetworks"),
+}
+
+
+def http_get(url: str, headers: dict = None) -> tuple:
+    """GET request, returns (status_code, body_text)."""
+    if HAS_REQUESTS:
+        r = requests.get(url, headers=headers or {}, timeout=30)
+        return r.status_code, r.text
+    else:
+        req = urllib.request.Request(url, headers=headers or {})
+        try:
+            resp = urllib.request.urlopen(req, timeout=30)
+            return resp.status, resp.read().decode()
+        except urllib.error.HTTPError as e:
+            return e.code, e.read().decode()
+
+
+def check_deps(workflow_path: str, host: str = "http://127.0.0.1:8188", api_key: str = None):
+    """Check workflow dependencies against a running server."""
+    # Load workflow
+    with open(workflow_path) as f:
+        workflow = json.load(f)
+
+    # Validate format
+    if "nodes" in workflow and "links" in workflow:
+        return {"error": "Workflow is in editor format, not API format."}
+
+    headers = {}
+    if api_key:
+        headers["X-API-Key"] = api_key
+
+    parsed_host = urlparse(host)
+    hostname = (parsed_host.hostname or "").lower()
+    is_cloud_host = hostname == "cloud.comfy.org" or hostname.endswith(".cloud.comfy.org")
+    is_cloud = is_cloud_host or api_key is not None
+    base = host.rstrip("/")
+
+    # Get installed node types
+    object_info_url = f"{base}/api/object_info" if is_cloud else f"{base}/object_info"
+    status, body = http_get(object_info_url, headers)
+    if status != 200:
+        return {"error": f"Cannot reach server at {host}. Is ComfyUI running? HTTP {status}"}
+
+    installed_nodes = set(json.loads(body).keys())
+
+    # Find required node types from workflow
+    required_nodes = set()
+    for node_id, node in workflow.items():
+        if isinstance(node, dict) and "class_type" in node:
+            required_nodes.add(node["class_type"])
+
+    missing_nodes = sorted(required_nodes - installed_nodes)
+
+    # Check model dependencies
+    missing_models = []
+    model_cache = {}  # folder → set of installed model filenames
+
+    for node_id, node in workflow.items():
+        if not isinstance(node, dict) or "class_type" not in node:
+            continue
+        class_type = node["class_type"]
+        if class_type not in MODEL_LOADERS:
+            continue
+
+        field, folder = MODEL_LOADERS[class_type]
+        inputs = node.get("inputs", {})
+        model_name = inputs.get(field)
+
+        if not model_name or not isinstance(model_name, str):
+            continue
+
+        # Fetch installed models for this folder (cached)
+        if folder not in model_cache:
+            models_url = f"{base}/api/models/{folder}" if is_cloud else f"{base}/models/{folder}"
+            s, b = http_get(models_url, headers)
+            if s == 200:
+                model_cache[folder] = set(json.loads(b))
+            else:
+                model_cache[folder] = set()
+
+        if model_name not in model_cache[folder]:
+            missing_models.append({
+                "node_id": node_id,
+                "class_type": class_type,
+                "field": field,
+                "value": model_name,
+                "folder": folder,
+            })
+
+    is_ready = len(missing_nodes) == 0 and len(missing_models) == 0
+
+    return {
+        "is_ready": is_ready,
+        "missing_nodes": missing_nodes,
+        "missing_models": missing_models,
+        "installed_nodes_count": len(installed_nodes),
+        "required_nodes": sorted(required_nodes),
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Check ComfyUI workflow dependencies")
+    parser.add_argument("workflow", help="Path to workflow API JSON file")
+    parser.add_argument("--host", default="http://127.0.0.1:8188", help="ComfyUI server URL")
+    parser.add_argument("--port", type=int, help="Server port (overrides --host port)")
+    parser.add_argument("--api-key", help="API key for cloud")
+    args = parser.parse_args()
+
+    # Handle --port override
+    host = args.host
+    if args.port and ":" not in host.split("//")[-1]:
+        host = f"{host}:{args.port}"
+
+    result = check_deps(args.workflow, host=host, api_key=args.api_key)
+    print(json.dumps(result, indent=2))
+
+    if result.get("error"):
+        sys.exit(1)
+    if not result.get("is_ready", False):
+        sys.exit(1)
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+# ComfyUI Setup — Install, launch, and verify using the official comfy-cli.
+# Usage: bash scripts/comfyui_setup.sh [--nvidia|--amd|--m-series|--cpu]
+#
+# If no flag is passed, runs hardware_check.py to detect the right one
+# automatically, and refuses to install locally when the verdict is "cloud"
+# (no usable GPU, too little VRAM, Intel Mac, etc.) — pointing the user
+# at Comfy Cloud instead.
+#
+# Prerequisites: Python 3.10+, pip
+# What it does:
+#   0. Hardware check (skipped if a flag was passed explicitly)
+#   1. Installs comfy-cli (if not present)
+#   2. Disables analytics tracking
+#   3. Installs ComfyUI + ComfyUI-Manager
+#   4. Launches server in background
+#   5. Verifies server is reachable
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+HARDWARE_CHECK="$SCRIPT_DIR/hardware_check.py"
+
+# Step 0: Hardware check (auto-detect GPU flag when none was provided)
+if [ $# -ge 1 ]; then
+    GPU_FLAG="$1"
+    echo "==> GPU flag: $GPU_FLAG (user-supplied, skipping hardware check)"
+else
+    if [ ! -f "$HARDWARE_CHECK" ]; then
+        echo "==> hardware_check.py not found, defaulting to --nvidia"
+        GPU_FLAG="--nvidia"
+    else
+        echo "==> Running hardware check..."
+        set +e
+        HW_JSON="$(python3 "$HARDWARE_CHECK" --json)"
+        HW_EXIT=$?
+        set -e
+        echo "$HW_JSON"
+        echo ""
+
+        VERDICT="$(echo "$HW_JSON" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("verdict",""))')"
+        FLAG="$(echo "$HW_JSON"   | python3 -c 'import sys,json; print(json.load(sys.stdin).get("comfy_cli_flag") or "")')"
+
+        if [ "$VERDICT" = "cloud" ]; then
+            echo ""
+            echo "==> Hardware check: this machine is not suitable for local ComfyUI."
+            echo "    Recommended: Comfy Cloud — https://platform.comfy.org"
+            echo ""
+            echo "    If you want to override and install anyway, re-run with an"
+            echo "    explicit flag: bash $0 --nvidia|--amd|--m-series|--cpu"
+            exit 2
+        fi
+
+        if [ -z "$FLAG" ]; then
+            echo "==> Hardware check couldn't pick a comfy-cli flag. Defaulting to --nvidia."
+            echo "    (For Intel Arc or unsupported hardware, use the manual install path.)"
+            GPU_FLAG="--nvidia"
+        else
+            GPU_FLAG="$FLAG"
+        fi
+
+        if [ "$VERDICT" = "marginal" ]; then
+            echo "==> Hardware check: verdict is MARGINAL."
+            echo "    SD1.5 should work; SDXL/Flux may be slow or OOM."
+            echo "    Consider Comfy Cloud for heavier workflows: https://platform.comfy.org"
+            echo ""
+        fi
+    fi
+fi
+
+echo "==> ComfyUI Setup"
+echo "    GPU flag: $GPU_FLAG"
+echo ""
+
+# Step 1: Install comfy-cli
+if command -v comfy >/dev/null 2>&1; then
+    echo "==> comfy-cli already installed: $(comfy -v 2>/dev/null || echo 'unknown version')"
+else
+    echo "==> Installing comfy-cli..."
+    pip install comfy-cli
+fi
+
+# Step 2: Disable tracking (avoid interactive prompt)
+echo "==> Disabling analytics tracking..."
+comfy --skip-prompt tracking disable 2>/dev/null || true
+
+# Step 3: Install ComfyUI
+if comfy which 2>/dev/null | grep -q "ComfyUI"; then
+    echo "==> ComfyUI already installed at: $(comfy which 2>/dev/null)"
+else
+    echo "==> Installing ComfyUI ($GPU_FLAG)..."
+    comfy --skip-prompt install $GPU_FLAG
+fi
+
+# Step 4: Launch in background
+echo "==> Launching ComfyUI in background..."
+comfy launch --background 2>/dev/null || {
+    echo "==> Background launch failed. Trying foreground check..."
+    echo "    You may need to run: comfy launch"
+    exit 1
+}
+
+# Step 5: Wait for server to be ready
+echo "==> Waiting for server..."
+MAX_WAIT=30
+ELAPSED=0
+while [ $ELAPSED -lt $MAX_WAIT ]; do
+    if curl -s http://127.0.0.1:8188/system_stats >/dev/null 2>&1; then
+        echo "==> Server is running!"
+        curl -s http://127.0.0.1:8188/system_stats | python3 -m json.tool 2>/dev/null || true
+        break
+    fi
+    sleep 2
+    ELAPSED=$((ELAPSED + 2))
+done
+
+if [ $ELAPSED -ge $MAX_WAIT ]; then
+    echo "==> Server did not start within ${MAX_WAIT}s."
+    echo "    Check logs with: comfy launch (foreground) to see errors."
+    exit 1
+fi
+
+echo ""
+echo "==> Setup complete!"
+echo "    Server: http://127.0.0.1:8188"
+echo "    Web UI: http://127.0.0.1:8188 (open in browser)"
+echo "    Stop:   comfy stop"
+echo ""
+echo "    Next steps:"
+echo "    - Download a model: comfy model download --url <URL> --relative-path models/checkpoints"
+echo "    - Run a workflow:   python3 scripts/run_workflow.py --workflow <file.json> --args '{...}'"
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+extract_schema.py — Analyze a ComfyUI API-format workflow and extract controllable parameters.
+
+Reads a workflow JSON, identifies user-facing parameters (prompts, seed, dimensions, etc.)
+by scanning node types and field names, and outputs a schema mapping.
+
+Usage:
+    python3 extract_schema.py workflow_api.json
+    python3 extract_schema.py workflow_api.json --output schema.json
+
+Output format:
+    {
+      "parameters": {
+        "prompt": {"node_id": "6", "field": "text", "type": "string", "value": "..."},
+        "seed": {"node_id": "3", "field": "seed", "type": "int", "value": 42},
+        ...
+      },
+      "output_nodes": ["9"],
+      "model_dependencies": [
+        {"node_id": "4", "class_type": "CheckpointLoaderSimple", "field": "ckpt_name", "value": "..."}
+      ]
+    }
+
+Requires: Python 3.10+ (stdlib only)
+"""
+
+import json
+import sys
+import argparse
+from pathlib import Path
+
+# Known parameter patterns: (class_type, field_name) → friendly_name
+PARAM_PATTERNS = [
+    # Prompts
+    ("CLIPTextEncode", "text", "prompt"),
+    ("CLIPTextEncodeSDXL", "text_g", "prompt"),
+    ("CLIPTextEncodeSDXL", "text_l", "prompt_l"),
+    # Sampling
+    ("KSampler", "seed", "seed"),
+    ("KSampler", "steps", "steps"),
+    ("KSampler", "cfg", "cfg"),
+    ("KSampler", "sampler_name", "sampler_name"),
+    ("KSampler", "scheduler", "scheduler"),
+    ("KSampler", "denoise", "denoise"),
+    ("KSamplerAdvanced", "noise_seed", "seed"),
+    ("KSamplerAdvanced", "steps", "steps"),
+    ("KSamplerAdvanced", "cfg", "cfg"),
+    ("KSamplerAdvanced", "sampler_name", "sampler_name"),
+    ("KSamplerAdvanced", "scheduler", "scheduler"),
+    # Dimensions
+    ("EmptyLatentImage", "width", "width"),
+    ("EmptyLatentImage", "height", "height"),
+    ("EmptyLatentImage", "batch_size", "batch_size"),
+    # Image input
+    ("LoadImage", "image", "image"),
+    ("LoadImageMask", "image", "mask_image"),
+    # LoRA
+    ("LoraLoader", "lora_name", "lora_name"),
+    ("LoraLoader", "strength_model", "lora_strength"),
+    # Output
+    ("SaveImage", "filename_prefix", "filename_prefix"),
+]
+
+# Node types that produce output files
+OUTPUT_NODES = {"SaveImage", "PreviewImage", "VHS_VideoCombine", "SaveAudio", "SaveAnimatedWEBP", "SaveAnimatedPNG"}
+
+# Node types that load models (for dependency checking)
+MODEL_LOADERS = {
+    "CheckpointLoaderSimple": ("ckpt_name", "checkpoints"),
+    "CheckpointLoader": ("ckpt_name", "checkpoints"),
+    "LoraLoader": ("lora_name", "loras"),
+    "LoraLoaderModelOnly": ("lora_name", "loras"),
+    "VAELoader": ("vae_name", "vae"),
+    "ControlNetLoader": ("control_net_name", "controlnet"),
+    "CLIPLoader": ("clip_name", "clip"),
+    "DualCLIPLoader": ("clip_name1", "clip"),
+    "UNETLoader": ("unet_name", "unet"),
+    "DiffusionModelLoader": ("model_name", "diffusion_models"),
+    "UpscaleModelLoader": ("model_name", "upscale_models"),
+    "CLIPVisionLoader": ("clip_name", "clip_vision"),
+}
+
+
+def validate_api_format(workflow: dict) -> bool:
+    """Check if workflow is in API format (not editor format)."""
+    if "nodes" in workflow and "links" in workflow:
+        return False
+    # API format: top-level keys are node IDs, each has class_type
+    for node_id, node in workflow.items():
+        if isinstance(node, dict) and "class_type" in node:
+            return True
+    return False
+
+
+def infer_type(value) -> str:
+    """Infer JSON schema type from a Python value."""
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return "string"
+    if isinstance(value, list):
+        return "link"  # connections to other nodes
+    return "unknown"
+
+
+def extract_schema(workflow: dict) -> dict:
+    """Extract controllable parameters from a workflow."""
+    parameters = {}
+    output_nodes = []
+    model_deps = []
+    name_counts = {}  # track duplicate friendly names
+
+    for node_id, node in workflow.items():
+        if not isinstance(node, dict) or "class_type" not in node:
+            continue
+
+        class_type = node["class_type"]
+        inputs = node.get("inputs", {})
+        meta_title = node.get("_meta", {}).get("title", "")
+
+        # Check if this is an output node
+        if class_type in OUTPUT_NODES:
+            output_nodes.append(node_id)
+
+        # Check if this is a model loader
+        if class_type in MODEL_LOADERS:
+            field, folder = MODEL_LOADERS[class_type]
+            if field in inputs and isinstance(inputs[field], str):
+                model_deps.append({
+                    "node_id": node_id,
+                    "class_type": class_type,
+                    "field": field,
+                    "value": inputs[field],
+                    "folder": folder,
+                })
+
+        # Extract controllable parameters
+        for pattern_class, pattern_field, friendly_name in PARAM_PATTERNS:
+            if class_type != pattern_class:
+                continue
+            if pattern_field not in inputs:
+                continue
+            value = inputs[pattern_field]
+            val_type = infer_type(value)
+            if val_type == "link":
+                continue  # skip linked inputs — not directly controllable
+
+            # Disambiguate duplicate friendly names
+            # Use title hint for prompt fields
+            actual_name = friendly_name
+            if friendly_name == "prompt" and meta_title:
+                title_lower = meta_title.lower()
+                if "negative" in title_lower or "neg" in title_lower:
+                    actual_name = "negative_prompt"
+
+            # Handle remaining duplicates by appending node_id
+            if actual_name in name_counts:
+                name_counts[actual_name] += 1
+                actual_name = f"{actual_name}_{node_id}"
+            else:
+                name_counts[actual_name] = 1
+
+            parameters[actual_name] = {
+                "node_id": node_id,
+                "field": pattern_field,
+                "type": val_type,
+                "value": value,
+            }
+
+    return {
+        "parameters": parameters,
+        "output_nodes": output_nodes,
+        "model_dependencies": model_deps,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Extract controllable parameters from a ComfyUI workflow")
+    parser.add_argument("workflow", help="Path to workflow API JSON file")
+    parser.add_argument("--output", "-o", help="Output file (default: stdout)")
+    args = parser.parse_args()
+
+    workflow_path = Path(args.workflow)
+    if not workflow_path.exists():
+        print(f"Error: {workflow_path} not found", file=sys.stderr)
+        sys.exit(1)
+
+    with open(workflow_path) as f:
+        workflow = json.load(f)
+
+    if not validate_api_format(workflow):
+        print("Error: Workflow is in editor format, not API format.", file=sys.stderr)
+        print("Re-export from ComfyUI using 'Save (API Format)' button.", file=sys.stderr)
+        sys.exit(1)
+
+    schema = extract_schema(workflow)
+
+    output_json = json.dumps(schema, indent=2)
+    if args.output:
+        Path(args.output).write_text(output_json)
+        print(f"Schema written to {args.output}", file=sys.stderr)
+    else:
+        print(output_json)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""Detect whether this machine can realistically run ComfyUI locally.
+
+Emits a structured JSON report the agent can read to decide whether to:
+  - help the user install ComfyUI locally, or
+  - steer them to Comfy Cloud instead.
+
+Usage:
+    python3 hardware_check.py [--json]
+
+Exit code:
+    0  → "ok"       — can run local ComfyUI at reasonable speed
+    1  → "marginal" — technically works but slow / memory-tight
+    2  → "cloud"    — local is not viable, recommend Comfy Cloud
+
+The JSON report always prints to stdout regardless of exit code.
+
+Output fields the agent should read:
+    verdict:                    "ok" | "marginal" | "cloud"
+    recommended_install_path:   "nvidia" | "amd" | "apple-silicon" | "intel" | "comfy-cloud"
+    comfy_cli_flag:             "--nvidia" | "--amd" | "--m-series" | None
+                                (pass directly to `comfy install` when verdict != cloud)
+    gpu:                        detected GPU info or null
+    notes:                      list of human-readable strings to surface to the user
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+
+
+# Rough thresholds. SDXL/Flux need real VRAM; SD1.5 will scrape by on 6GB.
+# Apple Silicon shares RAM with GPU — unified memory budget is total RAM.
+MIN_VRAM_GB_USABLE = 6     # below this, most modern models won't load
+OK_VRAM_GB = 8             # SDXL fits comfortably here
+GREAT_VRAM_GB = 12         # Flux / video models start being realistic
+MIN_MAC_RAM_GB = 16        # Apple Silicon unified memory; below = pain
+OK_MAC_RAM_GB = 32         # smooth for SDXL / most workflows
+
+
+def _run(cmd: list[str], timeout: int = 5) -> str:
+    try:
+        out = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout, check=False
+        )
+        return (out.stdout or "") + (out.stderr or "")
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return ""
+
+
+def detect_nvidia() -> dict | None:
+    if not shutil.which("nvidia-smi"):
+        return None
+    out = _run([
+        "nvidia-smi",
+        "--query-gpu=name,memory.total,driver_version",
+        "--format=csv,noheader,nounits",
+    ])
+    if not out.strip():
+        return None
+    first = out.strip().splitlines()[0]
+    parts = [p.strip() for p in first.split(",")]
+    if len(parts) < 2:
+        return None
+    name = parts[0]
+    try:
+        vram_mb = int(parts[1])
+    except ValueError:
+        vram_mb = 0
+    driver = parts[2] if len(parts) > 2 else ""
+    return {
+        "vendor": "nvidia",
+        "name": name,
+        "vram_gb": round(vram_mb / 1024, 1),
+        "driver": driver,
+    }
+
+
+def detect_rocm() -> dict | None:
+    if not shutil.which("rocm-smi"):
+        return None
+    out = _run(["rocm-smi", "--showproductname", "--showmeminfo", "vram"])
+    if not out.strip():
+        return None
+    name_m = re.search(r"Card series:\s*(.+)", out)
+    vram_m = re.search(r"VRAM Total Memory \(B\):\s*(\d+)", out)
+    vram_gb = 0.0
+    if vram_m:
+        vram_gb = round(int(vram_m.group(1)) / (1024**3), 1)
+    return {
+        "vendor": "amd",
+        "name": name_m.group(1).strip() if name_m else "AMD GPU",
+        "vram_gb": vram_gb,
+        "driver": "rocm",
+    }
+
+
+def detect_apple_silicon() -> dict | None:
+    if platform.system() != "Darwin":
+        return None
+    if platform.machine() != "arm64":
+        return None  # Intel Mac — no usable MPS
+    chip = _run(["sysctl", "-n", "machdep.cpu.brand_string"]).strip()
+    # Examples: "Apple M1", "Apple M1 Pro", "Apple M2 Max", "Apple M3 Ultra"
+    m = re.search(r"Apple M(\d+)", chip)
+    generation = int(m.group(1)) if m else 1
+    mem_bytes = 0
+    try:
+        mem_bytes = int(_run(["sysctl", "-n", "hw.memsize"]).strip() or 0)
+    except ValueError:
+        pass
+    ram_gb = round(mem_bytes / (1024**3), 1) if mem_bytes else 0.0
+    return {
+        "vendor": "apple",
+        "name": chip or "Apple Silicon",
+        "generation": generation,
+        "unified_memory_gb": ram_gb,
+    }
+
+
+def detect_intel_arc() -> dict | None:
+    if platform.system() != "Linux":
+        return None
+    if not shutil.which("clinfo"):
+        return None
+    out = _run(["clinfo", "--list"])
+    if "Intel" in out and ("Arc" in out or "Xe" in out):
+        return {"vendor": "intel", "name": "Intel Arc/Xe", "vram_gb": 0.0}
+    return None
+
+
+def total_system_ram_gb() -> float:
+    sysname = platform.system()
+    if sysname == "Darwin":
+        try:
+            return round(int(_run(["sysctl", "-n", "hw.memsize"]).strip() or 0) / (1024**3), 1)
+        except ValueError:
+            return 0.0
+    if sysname == "Linux":
+        try:
+            with open("/proc/meminfo", "r") as fh:
+                for line in fh:
+                    if line.startswith("MemTotal:"):
+                        kb = int(line.split()[1])
+                        return round(kb / (1024**2), 1)
+        except OSError:
+            return 0.0
+    if sysname == "Windows":
+        out = _run(["wmic", "ComputerSystem", "get", "TotalPhysicalMemory"])
+        m = re.search(r"(\d{6,})", out)
+        if m:
+            return round(int(m.group(1)) / (1024**3), 1)
+    return 0.0
+
+
+# Map recommended_install_path → flag the agent can pass to `comfy install`
+# Set to None when no local install is advised (verdict=cloud).
+_COMFY_CLI_FLAG = {
+    "nvidia": "--nvidia",
+    "amd": "--amd",
+    "apple-silicon": "--m-series",
+    "intel": None,          # comfy-cli has no Intel Arc flag — manual install
+    "comfy-cloud": None,
+}
+
+
+def classify(gpu: dict | None, ram_gb: float) -> tuple[str, str, list[str]]:
+    """Return (verdict, recommended_install_path, notes)."""
+    notes: list[str] = []
+
+    if gpu is None:
+        notes.append(
+            "No supported accelerator found (NVIDIA CUDA / AMD ROCm / Apple Silicon / Intel Arc)."
+        )
+        notes.append(
+            "CPU-only ComfyUI works but is unusably slow for modern models — use Comfy Cloud."
+        )
+        return "cloud", "comfy-cloud", notes
+
+    if gpu["vendor"] == "apple":
+        gen = gpu.get("generation", 1)
+        mem = gpu.get("unified_memory_gb", 0.0)
+        if mem < MIN_MAC_RAM_GB:
+            notes.append(
+                f"Apple Silicon with {mem} GB unified memory — below the {MIN_MAC_RAM_GB} GB practical minimum."
+            )
+            notes.append("SD1.5 may work; SDXL/Flux will swap or OOM. Recommend Comfy Cloud.")
+            return "cloud", "comfy-cloud", notes
+        if mem < OK_MAC_RAM_GB:
+            notes.append(
+                f"Apple Silicon M{gen} with {mem} GB — SDXL works but slow. Flux/video likely too tight."
+            )
+            return "marginal", "apple-silicon", notes
+        notes.append(f"Apple Silicon M{gen} with {mem} GB unified memory — good for SDXL/Flux.")
+        return "ok", "apple-silicon", notes
+
+    # Discrete GPU path (nvidia/amd/intel)
+    vram = gpu.get("vram_gb", 0.0)
+    if gpu["vendor"] == "intel":
+        notes.append("Intel Arc detected — ComfyUI IPEX support is experimental; Comfy Cloud is more reliable.")
+        return "marginal", "intel", notes
+    if vram < MIN_VRAM_GB_USABLE:
+        notes.append(
+            f"{gpu['name']} has only {vram} GB VRAM — below the {MIN_VRAM_GB_USABLE} GB practical minimum."
+        )
+        notes.append("Most modern models won't load. Recommend Comfy Cloud.")
+        return "cloud", "comfy-cloud", notes
+    if vram < OK_VRAM_GB:
+        notes.append(
+            f"{gpu['name']} ({vram} GB VRAM) — SD1.5 works, SDXL tight, Flux/video unlikely."
+        )
+        return "marginal", gpu["vendor"], notes
+    if vram < GREAT_VRAM_GB:
+        notes.append(f"{gpu['name']} ({vram} GB VRAM) — SDXL comfortable, Flux possible with optimizations.")
+        return "ok", gpu["vendor"], notes
+    notes.append(f"{gpu['name']} ({vram} GB VRAM) — can run everything including Flux/video.")
+    return "ok", gpu["vendor"], notes
+
+
+def build_report() -> dict:
+    sysname = platform.system()
+    arch = platform.machine()
+    ram_gb = total_system_ram_gb()
+
+    gpu = (
+        detect_nvidia()
+        or detect_rocm()
+        or detect_apple_silicon()
+        or detect_intel_arc()
+    )
+
+    # Intel Mac special case — fall out of apple-silicon detection with no GPU
+    if gpu is None and sysname == "Darwin" and platform.machine() != "arm64":
+        notes = [
+            "Intel Mac detected — no MPS backend available.",
+            "ComfyUI will fall back to CPU which is unusably slow. Use Comfy Cloud.",
+        ]
+        return {
+            "os": sysname,
+            "arch": arch,
+            "system_ram_gb": ram_gb,
+            "gpu": None,
+            "verdict": "cloud",
+            "recommended_install_path": "comfy-cloud",
+            "comfy_cli_flag": None,
+            "notes": notes,
+            "install_urls": _install_urls(),
+        }
+
+    verdict, install_path, notes = classify(gpu, ram_gb)
+
+    return {
+        "os": sysname,
+        "arch": arch,
+        "system_ram_gb": ram_gb,
+        "gpu": gpu,
+        "verdict": verdict,
+        "recommended_install_path": install_path,
+        "comfy_cli_flag": _COMFY_CLI_FLAG.get(install_path),
+        "notes": notes,
+        "install_urls": _install_urls(),
+    }
+
+
+def _install_urls() -> dict:
+    return {
+        "desktop": "https://docs.comfy.org/installation/desktop",
+        "manual": "https://docs.comfy.org/installation/manual_install",
+        "comfy_cli": "https://docs.comfy.org/comfy-cli/getting-started",
+        "cloud": "https://platform.comfy.org",
+    }
+
+
+def main() -> int:
+    report = build_report()
+    json_mode = "--json" in sys.argv
+
+    if json_mode:
+        print(json.dumps(report, indent=2))
+    else:
+        print(f"OS:      {report['os']} ({report['arch']})")
+        print(f"RAM:     {report['system_ram_gb']} GB")
+        if report["gpu"]:
+            g = report["gpu"]
+            if g["vendor"] == "apple":
+                print(f"GPU:     {g['name']} — {g.get('unified_memory_gb', 0)} GB unified memory")
+            else:
+                print(f"GPU:     {g['name']} — {g.get('vram_gb', 0)} GB VRAM")
+        else:
+            print("GPU:     (none detected)")
+        print(f"Verdict: {report['verdict']}  → {report['recommended_install_path']}")
+        if report["comfy_cli_flag"]:
+            print(f"         → run: comfy --skip-prompt install {report['comfy_cli_flag']}")
+        for n in report["notes"]:
+            print(f"  • {n}")
+
+    if report["verdict"] == "ok":
+        return 0
+    if report["verdict"] == "marginal":
+        return 1
+    return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+"""
+run_workflow.py — Inject parameters into a ComfyUI workflow, submit it, monitor execution,
+and download outputs.
+
+Usage:
+    # Local server
+    python3 run_workflow.py --workflow workflow_api.json \
+        --args '{"prompt": "a cat", "seed": 42}' \
+        --output-dir ./outputs
+
+    # Cloud server
+    python3 run_workflow.py --workflow workflow_api.json \
+        --args '{"prompt": "a cat"}' \
+        --host https://cloud.comfy.org \
+        --api-key comfyui-xxxxxxx \
+        --output-dir ./outputs
+
+    # With schema file (pre-extracted)
+    python3 run_workflow.py --workflow workflow_api.json \
+        --schema schema.json \
+        --args '{"prompt": "a cat"}' \
+        --output-dir ./outputs
+
+Requires: Python 3.10+, requests (or urllib as fallback)
+"""
+
+import json
+import sys
+import time
+import uuid
+import copy
+import argparse
+from pathlib import Path
+from urllib.parse import urljoin, urlencode, urlparse
+
+try:
+    import requests
+    HAS_REQUESTS = True
+except ImportError:
+    HAS_REQUESTS = False
+    import urllib.request
+    import urllib.error
+
+
+def http_get(url: str, headers: dict = None, follow_redirects: bool = True) -> tuple:
+    """GET request, returns (status_code, body_bytes, response_headers)."""
+    if HAS_REQUESTS:
+        r = requests.get(url, headers=headers or {}, allow_redirects=follow_redirects, timeout=30)
+        return r.status_code, r.content, dict(r.headers)
+    else:
+        req = urllib.request.Request(url, headers=headers or {})
+        try:
+            resp = urllib.request.urlopen(req, timeout=30)
+            return resp.status, resp.read(), dict(resp.headers)
+        except urllib.error.HTTPError as e:
+            return e.code, e.read(), dict(e.headers)
+
+
+def http_post(url: str, data: dict, headers: dict = None) -> tuple:
+    """POST JSON request, returns (status_code, response_dict)."""
+    payload = json.dumps(data).encode()
+    hdrs = {"Content-Type": "application/json"}
+    if headers:
+        hdrs.update(headers)
+    if HAS_REQUESTS:
+        r = requests.post(url, json=data, headers=hdrs, timeout=30)
+        try:
+            return r.status_code, r.json()
+        except Exception:
+            return r.status_code, {"raw": r.text}
+    else:
+        req = urllib.request.Request(url, data=payload, headers=hdrs, method="POST")
+        try:
+            resp = urllib.request.urlopen(req, timeout=30)
+            return resp.status, json.loads(resp.read())
+        except urllib.error.HTTPError as e:
+            return e.code, json.loads(e.read())
+
+
+class ComfyRunner:
+    def __init__(self, host: str = "http://127.0.0.1:8188", api_key: str = None):
+        self.host = host.rstrip("/")
+        self.api_key = api_key
+        parsed_host = urlparse(self.host).hostname or ""
+        self.is_cloud = parsed_host.lower() == "cloud.comfy.org" or api_key is not None
+        self.client_id = str(uuid.uuid4())
+
+    @property
+    def headers(self) -> dict:
+        h = {}
+        if self.api_key:
+            h["X-API-Key"] = self.api_key
+        return h
+
+    def api_url(self, path: str) -> str:
+        """Build URL. Cloud uses /api prefix for some endpoints."""
+        if self.is_cloud and not path.startswith("/api"):
+            # Cloud endpoints: /api/prompt, /api/view, /api/job, /api/queue
+            return f"{self.host}/api{path}"
+        return f"{self.host}{path}"
+
+    def check_server(self) -> bool:
+        """Check if server is reachable."""
+        try:
+            url = self.api_url("/system_stats") if not self.is_cloud else f"{self.host}/api/system_stats"
+            status, _, _ = http_get(url, self.headers)
+            return status == 200
+        except Exception:
+            return False
+
+    def submit(self, workflow: dict) -> dict:
+        """Submit workflow for execution. Returns {prompt_id, node_errors}."""
+        payload = {"prompt": workflow, "client_id": self.client_id}
+        if self.api_key and self.is_cloud:
+            payload.setdefault("extra_data", {})["api_key_comfy_org"] = self.api_key
+        url = self.api_url("/prompt")
+        status, resp = http_post(url, payload, self.headers)
+        if status != 200:
+            return {"error": f"HTTP {status}", "details": resp}
+        return resp
+
+    def poll_status(self, prompt_id: str, timeout: int = 120) -> dict:
+        """Poll until job completes. Returns final status dict."""
+        start = time.time()
+        poll_interval = 2.0
+
+        while time.time() - start < timeout:
+            if self.is_cloud:
+                # Cloud has a dedicated status endpoint
+                url = f"{self.host}/api/job/{prompt_id}/status"
+                status, body, _ = http_get(url, self.headers)
+                if status == 200:
+                    data = json.loads(body) if isinstance(body, bytes) else body
+                    job_status = data.get("status", "unknown")
+                    if job_status == "completed":
+                        return {"status": "success", "data": data}
+                    elif job_status == "failed":
+                        return {"status": "error", "data": data}
+                    elif job_status == "cancelled":
+                        return {"status": "cancelled", "data": data}
+                    # still running, continue polling
+            else:
+                # Local: check /history/{prompt_id}
+                url = f"{self.host}/history/{prompt_id}"
+                status, body, _ = http_get(url, self.headers)
+                if status == 200:
+                    data = json.loads(body) if isinstance(body, bytes) else body
+                    if prompt_id in data:
+                        entry = data[prompt_id]
+                        if entry.get("status", {}).get("completed", False):
+                            return {"status": "success", "outputs": entry.get("outputs", {})}
+                        if entry.get("status", {}).get("status_str") == "error":
+                            return {"status": "error", "data": entry}
+
+            time.sleep(poll_interval)
+            poll_interval = min(poll_interval * 1.2, 10.0)
+
+        return {"status": "timeout", "elapsed": time.time() - start}
+
+    def get_outputs(self, prompt_id: str) -> dict:
+        """Get output file info from history."""
+        if self.is_cloud:
+            url = f"{self.host}/api/job/{prompt_id}/status"
+        else:
+            url = f"{self.host}/history/{prompt_id}"
+        status, body, _ = http_get(url, self.headers)
+        if status != 200:
+            return {}
+        data = json.loads(body) if isinstance(body, bytes) else body
+        if self.is_cloud:
+            return data.get("outputs", {})
+        if prompt_id in data:
+            return data[prompt_id].get("outputs", {})
+        return {}
+
+    def download_output(self, filename: str, subfolder: str, file_type: str, output_dir: Path) -> Path:
+        """Download a single output file."""
+        params = urlencode({"filename": filename, "subfolder": subfolder, "type": file_type})
+        url = self.api_url(f"/view?{params}")
+        status, body, _ = http_get(url, self.headers, follow_redirects=True)
+        if status != 200:
+            raise RuntimeError(f"Failed to download {filename}: HTTP {status}")
+        out_path = output_dir / filename
+        out_path.write_bytes(body)
+        return out_path
+
+
+def load_schema(schema_path: str = None, workflow: dict = None) -> dict:
+    """Load or generate parameter schema."""
+    if schema_path:
+        with open(schema_path) as f:
+            return json.load(f)
+    # Inline extraction (same logic as extract_schema.py but simplified)
+    if workflow is None:
+        return {"parameters": {}}
+    # Import from sibling script
+    script_dir = Path(__file__).parent
+    sys.path.insert(0, str(script_dir))
+    from extract_schema import extract_schema
+    return extract_schema(workflow)
+
+
+def inject_params(workflow: dict, schema: dict, args: dict) -> dict:
+    """Inject user parameters into workflow based on schema mapping."""
+    wf = copy.deepcopy(workflow)
+    params = schema.get("parameters", {})
+
+    for param_name, value in args.items():
+        if param_name not in params:
+            print(f"Warning: unknown parameter '{param_name}', skipping", file=sys.stderr)
+            continue
+        mapping = params[param_name]
+        node_id = mapping["node_id"]
+        field = mapping["field"]
+        if node_id in wf and "inputs" in wf[node_id]:
+            wf[node_id]["inputs"][field] = value
+        else:
+            print(f"Warning: node {node_id} not found in workflow", file=sys.stderr)
+
+    return wf
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run a ComfyUI workflow with parameter injection")
+    parser.add_argument("--workflow", required=True, help="Path to workflow API JSON file")
+    parser.add_argument("--args", default="{}", help="JSON parameters to inject")
+    parser.add_argument("--schema", help="Path to schema JSON (from extract_schema.py). Auto-generated if omitted.")
+    parser.add_argument("--host", default="http://127.0.0.1:8188", help="ComfyUI server URL")
+    parser.add_argument("--api-key", help="API key for cloud (X-API-Key)")
+    parser.add_argument("--output-dir", default="./outputs", help="Directory to save outputs")
+    parser.add_argument("--timeout", type=int, default=120, help="Max seconds to wait for completion")
+    parser.add_argument("--no-download", action="store_true", help="Skip downloading outputs")
+    parser.add_argument("--submit-only", action="store_true", help="Submit and return prompt_id without waiting")
+    args = parser.parse_args()
+
+    # Load workflow
+    workflow_path = Path(args.workflow)
+    if not workflow_path.exists():
+        print(json.dumps({"error": f"Workflow file not found: {args.workflow}"}))
+        sys.exit(1)
+    with open(workflow_path) as f:
+        workflow = json.load(f)
+
+    # Validate format
+    if "nodes" in workflow and "links" in workflow:
+        print(json.dumps({"error": "Workflow is in editor format, not API format. Re-export with 'Save (API Format)'."}))
+        sys.exit(1)
+
+    # Parse user args
+    try:
+        user_args = json.loads(args.args)
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid --args JSON: {e}"}))
+        sys.exit(1)
+
+    # Load/generate schema and inject params
+    schema = load_schema(args.schema, workflow)
+    if user_args:
+        workflow = inject_params(workflow, schema, user_args)
+
+    # Connect to server
+    runner = ComfyRunner(host=args.host, api_key=args.api_key)
+
+    # Check server
+    if not runner.check_server():
+        print(json.dumps({"error": f"Cannot reach server at {args.host}. Is ComfyUI running?"}))
+        sys.exit(1)
+
+    # Submit
+    result = runner.submit(workflow)
+    if "error" in result:
+        print(json.dumps({"error": "Submission failed", "details": result}))
+        sys.exit(1)
+
+    prompt_id = result.get("prompt_id")
+    if not prompt_id:
+        print(json.dumps({"error": "No prompt_id in response", "response": result}))
+        sys.exit(1)
+
+    # Check for node errors
+    node_errors = result.get("node_errors", {})
+    if node_errors:
+        print(json.dumps({"error": "Workflow validation failed", "node_errors": node_errors}))
+        sys.exit(1)
+
+    if args.submit_only:
+        print(json.dumps({"status": "submitted", "prompt_id": prompt_id}))
+        sys.exit(0)
+
+    # Poll for completion
+    print(f"Submitted: {prompt_id}. Waiting...", file=sys.stderr)
+    poll_result = runner.poll_status(prompt_id, timeout=args.timeout)
+
+    if poll_result["status"] == "timeout":
+        print(json.dumps({"status": "timeout", "prompt_id": prompt_id, "elapsed": poll_result["elapsed"]}))
+        sys.exit(1)
+    elif poll_result["status"] == "error":
+        print(json.dumps({"status": "error", "prompt_id": prompt_id, "details": poll_result.get("data")}))
+        sys.exit(1)
+    elif poll_result["status"] == "cancelled":
+        print(json.dumps({"status": "cancelled", "prompt_id": prompt_id}))
+        sys.exit(1)
+
+    # Download outputs
+    outputs = poll_result.get("outputs") or runner.get_outputs(prompt_id)
+    if args.no_download:
+        print(json.dumps({"status": "success", "prompt_id": prompt_id, "outputs": outputs}))
+        sys.exit(0)
+
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    downloaded = []
+    for node_id, node_output in outputs.items():
+        # ComfyUI puts images/videos under "images" key (even for video)
+        for key in ("images", "gifs", "videos", "audio"):
+            if key not in node_output:
+                continue
+            for file_info in node_output[key]:
+                filename = file_info.get("filename", "")
+                subfolder = file_info.get("subfolder", "")
+                file_type = file_info.get("type", "output")
+                if not filename:
+                    continue
+                try:
+                    out_path = runner.download_output(filename, subfolder, file_type, output_dir)
+                    # Detect media type from extension
+                    ext = Path(filename).suffix.lower()
+                    if ext in (".mp4", ".webm", ".avi", ".mov", ".gif"):
+                        media_type = "video"
+                    elif ext in (".wav", ".mp3", ".flac", ".ogg"):
+                        media_type = "audio"
+                    else:
+                        media_type = "image"
+                    downloaded.append({
+                        "file": str(out_path),
+                        "node_id": node_id,
+                        "type": media_type,
+                        "filename": filename,
+                    })
+                except Exception as e:
+                    print(f"Warning: failed to download {filename}: {e}", file=sys.stderr)
+
+    print(json.dumps({
+        "status": "success",
+        "prompt_id": prompt_id,
+        "outputs": downloaded,
+    }, indent=2))
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,219 @@
+---
+name: pretext
+description: "Use when building creative browser demos with @chenglou/pretext — DOM-free text layout for ASCII art, typographic flow around obstacles, text-as-geometry games, kinetic typography, and text-powered generative art. Produces single-file HTML demos by default."
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [creative-coding, typography, pretext, ascii-art, canvas, generative, text-layout, kinetic-typography]
+    related_skills: [p5js, claude-design, excalidraw, architecture-diagram]
+---
+
+# Pretext Creative Demos
+
+## Overview
+
+[`@chenglou/pretext`](https://github.com/chenglou/pretext) is a 15KB zero-dependency TypeScript library by Cheng Lou (React core, ReasonML, Midjourney) for **DOM-free multiline text measurement and layout**. It does one thing: given `(text, font, width)`, return the line breaks, per-line widths, per-grapheme positions, and total height — all via canvas measurement, no reflow.
+
+That sounds like plumbing. It is not. Because it is fast and geometric, it is a **creative primitive**: you can reflow paragraphs around a moving sprite at 60fps, build games whose level geometry is made of real words, drive ASCII logos through prose, shatter text into particles with exact per-grapheme starting positions, or pack shrink-wrapped multiline UI without any `getBoundingClientRect` thrash.
+
+This skill exists so Hermes can make **cool demos** with it — the kind people post to X. See `pretext.cool` and `chenglou.me/pretext` for the community demo corpus.
+
+## When to Use
+
+Use when the user asks for:
+- A "pretext demo" / "cool pretext thing" / "text-as-X"
+- Text flowing around a moving shape (hero sections, editorial layouts, animated long-form pages)
+- ASCII-art effects using **real words or prose**, not monospace rasters
+- Games where the playfield / obstacles / bricks are made of text (Tetris-from-letters, Breakout-of-prose)
+- Kinetic typography with per-glyph physics (shatter, scatter, flock, flow)
+- Typographic generative art, especially with non-Latin scripts or mixed scripts
+- Multiline "shrink-wrap" UI (smallest container width that still fits the text)
+- Anything that would require knowing line breaks *before* rendering
+
+Don't use for:
+- Static SVG/HTML pages where CSS already solves layout — just use CSS
+- Rich text editors, general inline formatting engines (pretext is intentionally narrow)
+- Image → text (use `ascii-art` / `ascii-video` skills)
+- Pure canvas generative art with no text role — use `p5js`
+
+## Creative Standard
+
+This is visual art rendered in a browser. Pretext returns numbers; **you** draw the thing.
+
+- **Don't ship a "hello world" demo.** The `hello-orb-flow.html` template is the *starting* point. Every delivered demo must add intentional color, motion, composition, and one visual detail the user didn't ask for but will appreciate.
+- **Dark backgrounds, warm cores, considered palette.** Classic amber-on-black (CRT / terminal) works, but so do cold-white-on-charcoal (editorial) and desaturated pastels (risograph). Pick one and commit.
+- **Proportional fonts are the point.** Pretext's whole vibe is "not monospaced" — lean into it. Use Iowan Old Style, Inter, JetBrains Mono, Helvetica Neue, or a variable font. Never default sans.
+- **Real source/text, not lorem ipsum.** The corpus should mean something. Short manifestos, poetry, real source code, a found text, the library's own README — never `lorem ipsum`.
+- **First-paint excellence.** No loading states, no blank frames. The demo must look shippable the instant it opens.
+
+## Stack
+
+Single self-contained HTML file per demo. No build step.
+
+| Layer | Tool | Purpose |
+|-------|------|---------|
+| Core | `@chenglou/pretext` via `esm.sh` CDN | Text measurement + line layout |
+| Render | HTML5 Canvas 2D | Glyph rendering, per-frame composition |
+| Segmentation | `Intl.Segmenter` (built-in) | Grapheme splitting for emoji / CJK / combining marks |
+| Interaction | Raw DOM events | Mouse / touch / wheel — no framework |
+
+```html
+<script type="module">
+import {
+  prepare, layout,                   // use-case 1: simple height
+  prepareWithSegments, layoutWithLines,  // use-case 2a: fixed-width lines
+  layoutNextLineRange, materializeLineRange, // use-case 2b: streaming / variable width
+  measureLineStats, walkLineRanges,  // stats without string allocation
+} from "https://esm.sh/@chenglou/pretext@0.0.6";
+</script>
+```
+
+Pin the version. `@0.0.6` at time of writing — check [npm](https://www.npmjs.com/package/@chenglou/pretext) for the latest if demo behavior is off.
+
+## The Two Use Cases
+
+Almost everything reduces to one of these two shapes. Learn both.
+
+### Use-case 1 — measure, then render with CSS/DOM
+
+```js
+const prepared = prepare(text, "16px Inter");
+const { height, lineCount } = layout(prepared, 320, 20);
+```
+
+You still let the browser draw the text. Pretext just tells you how tall the box will be at a given width, **without** a DOM read. Use for:
+- Virtualized lists where rows contain wrapping text
+- Masonry with precise card heights
+- "Does this label fit?" dev-time checks
+- Preventing layout shift when remote text loads
+
+**Keep `font` and `letterSpacing` exactly in sync with your CSS.** The canvas `ctx.font` format (e.g. `"16px Inter"`, `"500 17px 'JetBrains Mono'"`) must match the rendered CSS, or measurements drift.
+
+### Use-case 2 — measure *and* render yourself
+
+```js
+const prepared = prepareWithSegments(text, FONT);
+const { lines } = layoutWithLines(prepared, 320, 26);
+for (let i = 0; i < lines.length; i++) {
+  ctx.fillText(lines[i].text, 0, i * 26);
+}
+```
+
+This is where the creative work lives. You own the drawing, so you can:
+- Render to canvas, SVG, WebGL, or any coordinate system
+- Substitute per-glyph transforms (rotation, jitter, scale, opacity)
+- Use line metadata (width, grapheme positions) as geometry
+
+For **variable-width-per-line** flow (text around a shape, text in a donut band, text in a non-rectangular column):
+
+```js
+let cursor = { segmentIndex: 0, graphemeIndex: 0 };
+let y = 0;
+while (true) {
+  const lineWidth = widthAtY(y);  // your function: how wide is the corridor at this y?
+  const range = layoutNextLineRange(prepared, cursor, lineWidth);
+  if (!range) break;
+  const line = materializeLineRange(prepared, range);
+  ctx.fillText(line.text, leftEdgeAtY(y), y);
+  cursor = range.end;
+  y += lineHeight;
+}
+```
+
+This is the most important pattern in the whole library. It's what unlocks "text flowing around a dragged sprite" — the demo that went viral on X.
+
+### Helpers worth knowing
+
+- `measureLineStats(prepared, maxWidth)` → `{ lineCount, maxLineWidth }` — the widest line, i.e. multiline shrink-wrap width.
+- `walkLineRanges(prepared, maxWidth, callback)` — iterate lines without allocating strings. Use for stats/physics over graphemes when you don't need the characters.
+- `@chenglou/pretext/rich-inline` — the same system but for paragraphs mixing fonts / chips / mentions. Import from the subpath.
+
+## Demo Recipe Patterns
+
+The community corpus (see `references/patterns.md`) clusters into a handful of strong patterns. Pick one and riff — don't invent a new category unless asked.
+
+| Pattern | Key API | Example idea |
+|---|---|---|
+| **Reflow around obstacle** | `layoutNextLineRange` + per-row width function | Editorial paragraph that parts around a dragged cursor sprite |
+| **Text-as-geometry game** | `layoutWithLines` + per-line collision rects | Breakout where each brick is a measured word |
+| **Shatter / particles** | `walkLineRanges` → per-grapheme (x,y) → physics | Sentence that explodes into letters on click |
+| **ASCII obstacle typography** | `layoutNextLineRange` + measured per-row obstacle spans | Bitmap ASCII logo, shape morphs, and draggable wire objects that make text open around their actual geometry |
+| **Editorial multi-column** | `layoutNextLineRange` per column + shared cursor | Animated magazine spread with pull quotes |
+| **Kinetic type** | `layoutWithLines` + per-line transform over time | Star Wars crawl, wave, bounce, glitch |
+| **Multiline shrink-wrap** | `measureLineStats` | Quote card that auto-sizes to its tightest container |
+
+See `templates/donut-orbit.html` and `templates/hello-orb-flow.html` for working single-file starters.
+
+## Workflow
+
+1. **Pick a pattern** from the table above based on the user's brief.
+2. **Start from a template**:
+   - `templates/hello-orb-flow.html` — text reflowing around a moving orb (reflow-around-obstacle pattern)
+   - `templates/donut-orbit.html` — advanced example: measured ASCII logo obstacles, draggable wire sphere/cube, morphing shape fields, selectable DOM text, and dev-only controls
+   - `write_file` to a new `.html` in `/tmp/` or the user's workspace.
+3. **Swap the corpus** for something intentional to the brief. Real prose, 10-100 sentences, no lorem.
+4. **Tune the aesthetic** — font, palette, composition, interaction. This is the work; don't skip it.
+5. **Verify locally**:
+   ```sh
+   cd <dir-with-html> && python3 -m http.server 8765
+   # then open http://localhost:8765/<file>.html
+   ```
+6. **Check the console** — pretext will throw if `prepareWithSegments` is called with a bad font string; `Intl.Segmenter` is available in every modern browser.
+7. **Show the user the file path**, not just the code — they want to open it.
+
+## Performance Notes
+
+- `prepare()` / `prepareWithSegments()` is the expensive call. Do it **once** per text+font pair. Cache the handle.
+- On resize, only rerun `layout()` / `layoutWithLines()` — never re-prepare.
+- For per-frame animations where text doesn't change but geometry does, `layoutNextLineRange` in a tight loop is cheap enough to do every frame at 60fps for normal-length paragraphs.
+- When rendering ASCII masks per frame, keep a cell buffer (`Uint8Array`/typed arrays), derive measured per-row obstacle spans from the cells or projected geometry, merge spans, then feed those spans into `layoutNextLineRange` before drawing text.
+- Keep visual animation and layout animation coupled. If a sphere morphs into a cube, tween both the rendered cell buffer and the obstacle spans with the same value; otherwise the demo looks painted-on instead of physically reflowed.
+- For fades, prefer layer opacity over changing glyph intensity or obstacle scale. Put transient ASCII sprites on their own canvas and fade the canvas with CSS/GSAP opacity so geometry does not appear to shrink.
+- Canvas `ctx.font` setting is surprisingly slow; set it **once** per frame if font doesn't vary, not per `fillText` call.
+
+## Common Pitfalls
+
+1. **Drifting CSS/canvas font strings.** `ctx.font = "16px Inter"` measured, but CSS says `font-family: Inter, sans-serif; font-size: 16px`. Fine *if* Inter loads. If Inter 404s, CSS falls back to sans-serif and measurements drift by 5-20%. Always `preload` the font or use a web-safe family.
+
+2. **Re-preparing inside the animation loop.** Only `layout*` is cheap. Re-calling `prepare` every frame will tank perf. Keep the prepared handle in module scope.
+
+3. **Forgetting `Intl.Segmenter` for grapheme splits.** Emoji, combining marks, CJK — `"é".split("")` gives you two chars. Use `new Intl.Segmenter(undefined, { granularity: "grapheme" })` when sampling individual visible glyphs.
+
+4. **`break: 'never'` chips without `extraWidth`.** In `rich-inline`, if you use `break: 'never'` for an atomic chip/mention, you must also supply `extraWidth` for the pill padding — otherwise chip chrome overflows the container.
+
+5. **Using `@chenglou/pretext` from `unpkg` with TypeScript-only entry.** Use `esm.sh` — it compiles the TS exports to browser-ready ESM automatically. `unpkg` will 404 or serve raw TS.
+
+6. **Monospace fallbacks silently erasing the whole point.** Users seeing monospace-looking output often have a CSS `font-family` that fell through to `monospace`. Verify the actual rendered font via DevTools.
+
+7. **Skipping rows vs adjusting width** when flowing around a shape. If the corridor on this row is too narrow to fit a line, *skip the row* (`y += lineHeight; continue;`) rather than passing a tiny maxWidth to `layoutNextLineRange` — pretext will return one-grapheme lines that look broken.
+
+8. **Shipping a cold demo.** The default first-paint looks tutorial-grade. Add: vignette, subtle scanline, idle auto-motion, one carefully chosen interactive response (drag, hover, scroll, click). Without these, "cool pretext demo" lands as "intern repro of the README."
+
+## Verification Checklist
+
+- [ ] Demo is a single self-contained `.html` file — opens by double-click or `python3 -m http.server`
+- [ ] `@chenglou/pretext` imported via `esm.sh` with pinned version
+- [ ] Corpus is real prose, not lorem ipsum, and matches the demo's concept
+- [ ] Font string passed to `prepare` matches the CSS font exactly
+- [ ] `prepare()` / `prepareWithSegments()` called once, not per frame
+- [ ] Dark background + considered palette — not the default white canvas
+- [ ] At least one interactive response (drag / hover / scroll / click) or idle auto-motion
+- [ ] Tested locally with `python3 -m http.server` and confirmed no console errors
+- [ ] 60fps on a mid-tier laptop (or graceful degradation documented)
+- [ ] One "extra mile" detail the user didn't ask for
+
+## Reference: Community Demos
+
+Clone these for inspiration / patterns (all MIT-ish, linked from [pretext.cool](https://www.pretext.cool/)):
+
+- **Pretext Breaker** — breakout with word-bricks — `github.com/rinesh/pretext-breaker`
+- **Tetris × Pretext** — `github.com/shinichimochizuki/tetris-pretext`
+- **Dragon animation** — `github.com/qtakmalay/PreTextExperiments`
+- **Somnai editorial engine** — `github.com/somnai-dreams/pretext-demos`
+- **Bad Apple!! ASCII** — `github.com/frmlinn/bad-apple-pretext`
+- **Drag-sprite reflow** — `github.com/dokobot/pretext-demo`
+- **Alarmy editorial clock** — `github.com/SmisLee/alarmy-pretext-demo`
+
+Official playground: [chenglou.me/pretext](https://chenglou.me/pretext/) — accordion, bubbles, dynamic-layout, editorial-engine, justification-comparison, masonry, markdown-chat, rich-note.
@@ -0,0 +1,258 @@
+# Pretext Patterns
+
+Copy-pasteable snippets for the most common pretext demo shapes. Each pattern is self-contained — drop into an HTML `<script type="module">` after importing from `https://esm.sh/@chenglou/pretext@0.0.6`.
+
+## 1. Flow around an obstacle (variable-width column)
+
+The signature pretext move. Row-by-row ask "how wide is the corridor here?" and let pretext break lines accordingly.
+
+```js
+const prepared = prepareWithSegments(TEXT, FONT);
+const LINE_H = 24;
+
+function drawFlow(ctx, obstacle /* {x,y,r} */, COL_X, COL_W, H) {
+  let cursor = { segmentIndex: 0, graphemeIndex: 0 };
+  let y = 72;
+  while (y < H - 40) {
+    const dy = y - obstacle.y;
+    const inBand = Math.abs(dy) < obstacle.r;
+    let x = COL_X, w = COL_W;
+    if (inBand) {
+      const half = Math.sqrt(obstacle.r ** 2 - dy ** 2);
+      const leftW  = Math.max(0, (obstacle.x - half) - COL_X);
+      const rightW = Math.max(0, (COL_X + COL_W) - (obstacle.x + half));
+      if (leftW >= rightW) { x = COL_X;                 w = leftW  - 12; }
+      else                 { x = obstacle.x + half + 12; w = rightW - 12; }
+      if (w < 40) { y += LINE_H; continue; } // skip rather than squeeze
+    }
+    const range = layoutNextLineRange(prepared, cursor, w);
+    if (!range) break;
+    const line = materializeLineRange(prepared, range);
+    ctx.fillText(line.text, x, y);
+    cursor = range.end;
+    y += LINE_H;
+  }
+}
+```
+
+**Obstacle variants:** circles (above), rectangles (use `Math.max(0, …)` on the row-segment), multiple obstacles (sort segments and emit the wider remaining lane), animated obstacles (recompute every frame — pretext is fast enough).
+
+## 2. Text-as-geometry game (word-bricks with collision)
+
+Use `layoutWithLines` to get stable line rects, then treat each word as an axis-aligned box for physics.
+
+```js
+const prepared = prepareWithSegments(WORDS.join(" "), FONT);
+const { lines } = layoutWithLines(prepared, FIELD_W, 28);
+
+// Build brick rects: split each line on spaces and measure word-by-word.
+const bricks = [];
+let y = 50;
+for (const line of lines) {
+  let x = 10;
+  for (const word of line.text.split(" ")) {
+    const wPx = ctx.measureText(word).width; // or use walkLineRanges per word
+    bricks.push({ x, y, w: wPx, h: 24, text: word, hp: 1 });
+    x += wPx + ctx.measureText(" ").width;
+  }
+  y += 28;
+}
+```
+
+Collision: standard AABB vs the ball. When `hp` drops to 0, the brick is "eaten." For the aesthetic: fade brick opacity with hp, trail particles from the letters on impact.
+
+## 3. Shatter / explode typography
+
+Use `walkLineRanges` + a manual grapheme walk to get `(x, y)` for every glyph, then spawn particles.
+
+```js
+const prepared = prepareWithSegments(TEXT, FONT);
+const particles = [];
+let y = 100;
+walkLineRanges(prepared, COL_W, (line) => {
+  // materialize so we get per-grapheme positions
+  const range = materializeLineRange(prepared, line);
+  const seg = new Intl.Segmenter(undefined, { granularity: "grapheme" });
+  let x = COL_X;
+  for (const { segment } of seg.segment(range.text)) {
+    const w = ctx.measureText(segment).width;
+    particles.push({ ch: segment, x, y, vx: 0, vy: 0, homeX: x, homeY: y });
+    x += w;
+  }
+  y += LINE_H;
+});
+
+// On click, kick particles outward from click point; ease them back to (homeX, homeY).
+canvas.addEventListener("click", (e) => {
+  for (const p of particles) {
+    const dx = p.x - e.clientX, dy = p.y - e.clientY;
+    const d = Math.hypot(dx, dy) || 1;
+    const force = 400 / (d * 0.2 + 1);
+    p.vx += (dx / d) * force;
+    p.vy += (dy / d) * force;
+  }
+});
+
+function tick(dt) {
+  for (const p of particles) {
+    p.vx *= 0.92; p.vy *= 0.92;
+    p.vx += (p.homeX - p.x) * 0.06;
+    p.vy += (p.homeY - p.y) * 0.06;
+    p.x += p.vx * dt; p.y += p.vy * dt;
+  }
+}
+```
+
+## 4. ASCII mask as moving obstacle
+
+The "cool demos" money pattern: rasterize an ASCII logo, sprite, or bitmap into a cell buffer, then convert the occupied cells into per-row obstacle spans. Pretext lays the paragraphs around those spans, so the text actually opens around the moving ASCII object instead of being visually overpainted.
+
+See `templates/donut-orbit.html` in this skill for a full implementation. Treat it as an example, not the canonical scene: it shows how to derive spans from an ASCII logo, project a wire shape into obstacle rows, keep text selectable in a DOM layer, and hide tuning controls behind `?dev`. Key structure:
+
+```js
+const CELL_W = 12, CELL_H = 15;
+const cols = Math.ceil(W / CELL_W), rows = Math.ceil(H / CELL_H);
+const asciiMask = new Uint8Array(cols * rows);
+const obstacleRows = Array.from({ length: rows }, () => []);
+
+function rasterizeLogo(time) {
+  asciiMask.fill(0);
+  for (const r of obstacleRows) r.length = 0;
+
+  for (const block of logoBlocks(time)) {
+    const r0 = Math.floor(block.y0 / CELL_H);
+    const r1 = Math.ceil(block.y1 / CELL_H);
+    for (let r = r0; r <= r1; r++) {
+      obstacleRows[r]?.push([block.x0 - 18, block.x1 + 22]);
+      // Fill asciiMask cells here for drawing.
+    }
+  }
+
+  mergeRowSpans(obstacleRows);
+}
+
+function drawParagraphs(prepared) {
+  let cursor = { segmentIndex: 0, graphemeIndex: 0 };
+  for (let y = yStart; y < yEnd; y += LINE_H) {
+    const spans = obstacleRows[Math.floor(y / CELL_H)];
+    for (const [x0, x1] of freeIntervalsAround(spans)) {
+      const range = layoutNextLineRange(prepared, cursor, x1 - x0);
+      if (!range) return;
+      ctx.fillText(materializeLineRange(prepared, range).text, x0, y);
+      cursor = range.end;
+    }
+  }
+}
+```
+
+The important bit is that the ASCII geometry is not decorative only. The same moving spans that draw the logo or draggable object also carve the line intervals passed to `layoutNextLineRange`.
+
+### Measured spans beat magic padding
+
+When a logo or bitmap is rasterized into cells, measure the actual occupied cells per row and then add a small halo. Do not use one giant bounding box. Tight measured spans make the text read as if it is flowing around the letter shapes.
+
+```js
+const rowMin = new Float32Array(rows).fill(Infinity);
+const rowMax = new Float32Array(rows).fill(-Infinity);
+
+for (const cell of visibleCells) {
+  rowMin[cell.row] = Math.min(rowMin[cell.row], cell.x);
+  rowMax[cell.row] = Math.max(rowMax[cell.row], cell.x + CELL_W);
+}
+
+for (let row = 0; row < rows; row++) {
+  if (!Number.isFinite(rowMin[row])) continue;
+  obstacleRows[row].push([rowMin[row] - halo, rowMax[row] + halo]);
+}
+```
+
+For sharp pixel-art letters, smooth adjacent rows before pushing spans. A 1-2 row halo usually prevents code/prose from touching corners without losing the letter silhouette.
+
+### Morphing shapes need morphing obstacles
+
+If the visible object morphs (sphere to cube, logo to particles, etc.), tween the collision field too. A convincing demo uses the same `mix` value for both the rendered buffer and the pretext obstacle rows.
+
+```js
+function pushMorphedRows(aRows, bRows, mix) {
+  for (let row = 0; row < rows; row++) {
+    const a = aRows[row] ?? [centerX, centerX];
+    const b = bRows[row] ?? [centerX, centerX];
+    obstacleRows[row].push([
+      a[0] + (b[0] - a[0]) * mix,
+      a[1] + (b[1] - a[1]) * mix,
+    ]);
+  }
+}
+```
+
+Without this, the artwork may morph while the text still wraps around the old shape, which breaks the pretext effect.
+
+### Separate visual layers from collision
+
+Use separate canvases when visual treatment should not affect layout. For example, fade an ASCII object with CSS opacity on its own canvas layer, but keep its obstacle rows controlled by explicit shape state. Fading glyph intensity or scaling obstacle spans often looks like the object is shrinking instead of fading.
+
+## 5. Editorial multi-column with shared cursor
+
+Classic magazine layout: three columns, text flows from the end of column 1 into the top of column 2, etc. Pretext makes this trivial because the cursor is portable between `layoutNextLineRange` calls.
+
+```js
+const prepared = prepareWithSegments(ARTICLE, FONT);
+let cursor = { segmentIndex: 0, graphemeIndex: 0 };
+
+for (const col of [COL1, COL2, COL3]) {
+  let y = col.y;
+  while (y < col.y + col.h) {
+    const range = layoutNextLineRange(prepared, cursor, col.w);
+    if (!range) return;
+    const line = materializeLineRange(prepared, range);
+    ctx.fillText(line.text, col.x, y);
+    cursor = range.end;
+    y += LINE_H;
+  }
+}
+```
+
+Add pull quotes by treating them as obstacles in the middle column and using pattern #1 around them.
+
+## 6. Multiline shrink-wrap (tightest-fitting card)
+
+Given a max width, find the **smallest** container width that still produces the same line count. Useful for chat bubbles, quote cards, tooltip sizing.
+
+```js
+const prepared = prepareWithSegments(text, FONT);
+const { lineCount, maxLineWidth } = measureLineStats(prepared, MAX_W);
+// card width = maxLineWidth + padding; card height = lineCount * LINE_H + padding
+```
+
+For a demo that *visualizes* this, render the card shrinking from `MAX_W` down to `maxLineWidth` over a second — the line count stays constant but the right edge pulls in.
+
+## 7. Kinetic typography
+
+Animate per-line transforms over time. `layoutWithLines` gives you stable lines; index `i` drives the timing offset.
+
+```js
+const { lines } = layoutWithLines(prepared, W - 80, 40);
+function frame(t) {
+  for (let i = 0; i < lines.length; i++) {
+    const phase = t * 0.001 - i * 0.15;
+    const y = 100 + i * 40 + Math.sin(phase) * 12;
+    const opacity = 0.4 + 0.6 * Math.max(0, Math.sin(phase));
+    ctx.globalAlpha = opacity;
+    ctx.fillText(lines[i].text, 40, y);
+  }
+}
+```
+
+Variants: Star Wars crawl (perspective skew per line), wave (sine y-offset), bounce (ease-in-out arrival), glitch (per-glyph random offset using `Intl.Segmenter`).
+
+## 8. Font stack patterns
+
+| Vibe | Font string | Palette hint |
+|------|-------------|--------------|
+| Editorial / serious | `17px/1.4 "Iowan Old Style", Georgia, serif` | bone `#e8e6df` on charcoal `#0c0d10` |
+| CRT / terminal | `600 13px "JetBrains Mono", ui-monospace, monospace` | amber `hsl(38 60% 62%)` on `#07070a` |
+| Humanist / modern | `500 17px Inter, ui-sans-serif, system-ui, sans-serif` | off-white `#f3efe6` on deep-navy `#0b1020` |
+| Display / poster | `700 64px "Playfair Display", serif` | hot-red `#ff4130` on cream `#f0ebe0` |
+| Engineering | `14px "IBM Plex Mono", monospace` | neon-green `#7cff7c` on near-black `#0a0a0c` |
+
+Always load the web font explicitly (Google Fonts link tag or `@font-face`) so the canvas measurement matches the CSS render.
@@ -0,0 +1,95 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<title>pretext hello — text flowing around an orb</title>
+<style>
+  html,body { margin:0; padding:0; height:100%; background:#0c0d10; color:#e8e6df; overflow:hidden; }
+  body { font-family: "Iowan Old Style", Georgia, serif; }
+  canvas { display:block; width:100vw; height:100vh; }
+</style>
+</head>
+<body>
+<canvas id="c"></canvas>
+<script type="module">
+// Minimal pretext starter: long paragraph flows around a moving orb.
+// Uses layoutNextLineRange + variable-width streaming — the "killer app"
+// pattern that only pretext can do cheaply in the browser.
+import {
+  prepareWithSegments,
+  layoutNextLineRange,
+  materializeLineRange,
+} from "https://esm.sh/@chenglou/pretext@0.0.6";
+
+const TEXT = `Pretext measures text without touching the DOM. It returns numbers — widths, line breaks, cursors — and those numbers, arranged with a little imagination, become layouts the browser could never draw on its own. Here, a paragraph flows around a moving orb. Each line is asked for its own width, live. No reflows. No cheats. Just measurement. `.repeat(18);
+
+const FONT = '17px/1.4 "Iowan Old Style", Georgia, serif';
+const LINE_H = 24;
+
+const c = document.getElementById("c");
+const ctx = c.getContext("2d");
+let W, H, DPR;
+function resize() {
+  DPR = Math.min(devicePixelRatio || 1, 2);
+  W = innerWidth; H = innerHeight;
+  c.width = W*DPR; c.height = H*DPR;
+  c.style.width = W+"px"; c.style.height = H+"px";
+  ctx.setTransform(DPR,0,0,DPR,0,0);
+}
+addEventListener("resize", resize); resize();
+
+const prepared = prepareWithSegments(TEXT, FONT);
+
+// Orb follows mouse (or bobs idly)
+const orb = { x: innerWidth*0.45, y: innerHeight*0.5, r: 140 };
+addEventListener("mousemove", e => { orb.x = e.clientX; orb.y = e.clientY; });
+
+function frame(t) {
+  ctx.fillStyle = "#0c0d10"; ctx.fillRect(0,0,W,H);
+
+  // glowing orb
+  const g = ctx.createRadialGradient(orb.x, orb.y, 0, orb.x, orb.y, orb.r);
+  g.addColorStop(0, "rgba(255,200,120,0.35)");
+  g.addColorStop(0.6, "rgba(255,140,80,0.10)");
+  g.addColorStop(1, "rgba(0,0,0,0)");
+  ctx.fillStyle = g; ctx.fillRect(0,0,W,H);
+
+  // flow text as a column, routing around the orb row-by-row
+  const COL_X = 60, COL_W = W - 120;
+  let cursor = { segmentIndex: 0, graphemeIndex: 0 };
+  let y = 72;
+  ctx.fillStyle = "#e8e6df";
+  ctx.font = FONT;
+  ctx.textBaseline = "alphabetic";
+
+  while (y < H - 40) {
+    // does this row intersect the orb band?
+    const dy = y - orb.y;
+    const bandY = Math.abs(dy) < orb.r;
+    // lane = (left, width) skipping over the orb horizontally
+    let x = COL_X, lineMaxW = COL_W;
+    if (bandY) {
+      const half = Math.sqrt(orb.r*orb.r - dy*dy);
+      const orbLeft  = orb.x - half, orbRight = orb.x + half;
+      // choose the wider side, simple heuristic
+      const leftWidth  = Math.max(0, orbLeft  - COL_X);
+      const rightWidth = Math.max(0, COL_X + COL_W - orbRight);
+      if (leftWidth >= rightWidth) { x = COL_X; lineMaxW = leftWidth - 12; }
+      else { x = orbRight + 12; lineMaxW = rightWidth - 12; }
+      if (lineMaxW < 40) { y += LINE_H; continue; }
+    }
+
+    const range = layoutNextLineRange(prepared, cursor, lineMaxW);
+    if (!range) break;
+    const line = materializeLineRange(prepared, range);
+    ctx.fillText(line.text, x, y);
+    cursor = range.end;
+    y += LINE_H;
+  }
+
+  requestAnimationFrame(frame);
+}
+requestAnimationFrame(frame);
+</script>
+</body>
+</html>
@@ -0,0 +1,217 @@
+---
+name: sketch
+description: "Throwaway HTML mockups: 2-3 design variants to compare."
+version: 1.0.0
+author: Hermes Agent (adapted from gsd-build/get-shit-done)
+license: MIT
+metadata:
+  hermes:
+    tags: [sketch, mockup, design, ui, prototype, html, variants, exploration, wireframe, comparison]
+    related_skills: [spike, claude-design, popular-web-designs, excalidraw]
+---
+
+# Sketch
+
+Use this skill when the user wants to **see a design direction before committing** to one — exploring a UI/UX idea as disposable HTML mockups. The point is to generate 2-3 interactive variants so the user can compare visual directions side-by-side, not to produce shippable code.
+
+Load this when the user says things like "sketch this screen", "show me what X could look like", "compare layout A vs B", "give me 2-3 takes on this UI", "let me see some variants", "mockup this before I build".
+
+## When NOT to use this
+
+- User wants a production component — use `claude-design` or build it properly
+- User wants a polished one-off HTML artifact (landing page, deck) — `claude-design`
+- User wants a diagram — `excalidraw`, `architecture-diagram`
+- The design is already locked — just build it
+
+## If the user has the full GSD system installed
+
+If `gsd-sketch` shows up as a sibling skill (installed via `npx get-shit-done-cc --hermes`), prefer **`gsd-sketch`** for the full workflow: persistent `.planning/sketches/` with MANIFEST, frontier mode analysis, consistency audits across past sketches, and integration with the rest of GSD. This skill is the lightweight standalone version — one-off sketching without the state machinery.
+
+## Core method
+
+```
+intake  →  variants  →  head-to-head  →  pick winner (or iterate)
+```
+
+### 1. Intake (skip if the user already gave you enough)
+
+Before generating variants, get three things — one question at a time, not all at once:
+
+1. **Feel.** "What should this feel like? Adjectives, emotions, a vibe." — *"calm, editorial, like Linear"* tells you more than *"minimal"*.
+2. **References.** "What apps, sites, or products capture the feel you're imagining?" — actual references beat abstract descriptions.
+3. **Core action.** "What's the single most important thing a user does on this screen?" — the variants should all serve this well; if they don't, they're just decoration.
+
+Reflect each answer briefly before the next question. If the user already gave you all three upfront, skip straight to variants.
+
+### 2. Variants (2-3, never 1, rarely 4+)
+
+Produce **2-3 variants** in one go. Each variant is a complete, standalone HTML file. Don't describe variants — build them. The point is comparison.
+
+Each variant should take a **different design stance**, not different pixel values. Three good variant axes:
+
+- **Density:** compact / airy / ultra-dense (pick two contrasting poles)
+- **Emphasis:** content-first / action-first / tool-first
+- **Aesthetic:** editorial / utilitarian / playful
+- **Layout:** single-column / sidebar / split-pane
+- **Grounding:** card-based / bare-content / document-style
+
+Pick one axis and pull apart from it. Two variants that differ only in accent color are wasted effort — the user can't distinguish them.
+
+**Variant naming:** describe the stance, not the number.
+
+```
+sketches/
+├── 001-calm-editorial/
+│   ├── index.html
+│   └── README.md
+├── 001-utilitarian-dense/
+│   ├── index.html
+│   └── README.md
+└── 001-playful-split/
+    ├── index.html
+    └── README.md
+```
+
+### 3. Make them real HTML
+
+Each variant is a **single self-contained HTML file**:
+
+- Inline `<style>` — no build step, no external CSS
+- System fonts or one Google Font via `<link>`
+- Tailwind via CDN (`<script src="https://cdn.tailwindcss.com"></script>`) is fine
+- Realistic fake content — actual sentences, actual names, not "Lorem ipsum"
+- **Interactive**: links clickable, hovers real, at least one state transition (open/close, filter, toggle). A frozen static image is a worse spike than a sloppy animated one.
+
+Open it in a browser. If it looks broken, fix it before showing the user.
+
+**Verify variants visually — use Hermes' browser tools.** Don't just write HTML and hope it renders; load each variant and look at it:
+
+```
+browser_navigate(url="file:///absolute/path/to/sketches/001-calm-editorial/index.html")
+browser_vision(question="Does this layout look clean and readable? Any visible bugs (overlapping text, unstyled elements, broken images)?")
+```
+
+`browser_vision` returns an AI description of what's actually on the page plus a screenshot path — catches layout bugs that pure source inspection misses (e.g. a font import that silently failed, a flex container that collapsed). Fix and re-navigate until each variant looks right.
+
+**Default CSS reset + system font stack** for fast starts:
+
+```html
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  body {
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+                 "Helvetica Neue", Arial, sans-serif;
+    -webkit-font-smoothing: antialiased;
+    color: #1a1a1a;
+    background: #fafafa;
+    line-height: 1.5;
+  }
+</style>
+```
+
+### 4. Variant README
+
+Each variant's `README.md` answers:
+
+```markdown
+## Variant: {stance name}
+
+### Design stance
+One sentence on the principle driving this variant.
+
+### Key choices
+- Layout: ...
+- Typography: ...
+- Color: ...
+- Interaction: ...
+
+### Trade-offs
+- Strong at: ...
+- Weak at: ...
+
+### Best for
+- The kind of user or use case this variant actually serves
+```
+
+### 5. Head-to-head
+
+After all variants are built, present them as a comparison. Don't just list — **opinionate**:
+
+```markdown
+## Three takes on the home screen
+
+| Dimension | Calm editorial | Utilitarian dense | Playful split |
+|-----------|----------------|-------------------|---------------|
+| Density   | Low            | High              | Medium        |
+| Primary action visibility | Low | High | Medium |
+| Scan-ability | High | Medium | Low |
+| Feel | Calm, trusted | Sharp, tool-like | Inviting, energetic |
+
+**My take:** Utilitarian dense for power users, calm editorial for content-forward audiences. Playful split is weakest — tries to do both and commits to neither.
+```
+
+Let the user pick a winner, or combine two into a hybrid, or ask for another round.
+
+## Theming (when the project has a visual identity)
+
+If the user has an existing theme (colors, fonts, tokens), put shared tokens in `sketches/themes/tokens.css` and `@import` them in each variant. Keep tokens minimal:
+
+```css
+/* sketches/themes/tokens.css */
+:root {
+  --color-bg: #fafafa;
+  --color-fg: #1a1a1a;
+  --color-accent: #0066ff;
+  --color-muted: #666;
+  --radius: 8px;
+  --font-display: "Inter", sans-serif;
+  --font-body: -apple-system, BlinkMacSystemFont, sans-serif;
+}
+```
+
+Don't over-tokenize a throwaway sketch — three colors and one font is usually enough.
+
+## Interactivity bar
+
+A sketch is interactive enough when the user can:
+
+1. **Click a primary action** and something visible happens (state change, modal, toast, navigation feint)
+2. **See one meaningful state transition** (filter a list, toggle a mode, open/close a panel)
+3. **Hover recognizable affordances** (buttons, rows, tabs)
+
+More than that is over-engineering a throwaway. Less than that is a screenshot.
+
+## Frontier mode (picking what to sketch next)
+
+If sketches already exist and the user says "what should I sketch next?":
+
+- **Consistency gaps** — two winning variants from different sketches made independent choices that haven't been composed together yet
+- **Unsketched screens** — referenced but never explored
+- **State coverage** — happy path sketched, but not empty / loading / error / 1000-items
+- **Responsive gaps** — validated at one viewport; does it hold at mobile / ultrawide?
+- **Interaction patterns** — static layouts exist; transitions, drag, scroll behavior don't
+
+Propose 2-4 named candidates. Let the user pick.
+
+## Output
+
+- Create `sketches/` (or `.planning/sketches/` if the user is using GSD conventions) in the repo root
+- One subdir per variant: `NNN-stance-name/index.html` + `README.md`
+- Tell the user how to open them: `open sketches/001-calm-editorial/index.html` on macOS, `xdg-open` on Linux, `start` on Windows
+- Keep variants disposable — a sketch that you felt the need to preserve should be promoted into real project code, not curated as an asset
+
+**Typical tool sequence for one variant:**
+
+```
+terminal("mkdir -p sketches/001-calm-editorial")
+write_file("sketches/001-calm-editorial/index.html", "<!doctype html>...")
+write_file("sketches/001-calm-editorial/README.md", "## Variant: Calm editorial\n...")
+browser_navigate(url="file://$(pwd)/sketches/001-calm-editorial/index.html")
+browser_vision(question="How does this look? Any obvious layout issues?")
+```
+
+Repeat for each variant, then present the comparison table.
+
+## Attribution
+
+Adapted from the GSD (Get Shit Done) project's `/gsd-sketch` workflow — MIT © 2025 Lex Christopherson ([gsd-build/get-shit-done](https://github.com/gsd-build/get-shit-done)). The full GSD system ships persistent sketch state, theme/variant pattern references, and consistency-audit workflows; install with `npx get-shit-done-cc --hermes --global`.
@@ -0,0 +1,196 @@
+---
+name: spike
+description: "Throwaway experiments to validate an idea before build."
+version: 1.0.0
+author: Hermes Agent (adapted from gsd-build/get-shit-done)
+license: MIT
+metadata:
+  hermes:
+    tags: [spike, prototype, experiment, feasibility, throwaway, exploration, research, planning, mvp, proof-of-concept]
+    related_skills: [sketch, writing-plans, subagent-driven-development, plan]
+---
+
+# Spike
+
+Use this skill when the user wants to **feel out an idea** before committing to a real build — validating feasibility, comparing approaches, or surfacing unknowns that no amount of research will answer. Spikes are disposable by design. Throw them away once they've paid their debt.
+
+Load this when the user says things like "let me try this", "I want to see if X works", "spike this out", "before I commit to Y", "quick prototype of Z", "is this even possible?", or "compare A vs B".
+
+## When NOT to use this
+
+- The answer is knowable from docs or reading code — just do research, don't build
+- The work is production path — use `writing-plans` / `plan` instead
+- The idea is already validated — jump straight to implementation
+
+## If the user has the full GSD system installed
+
+If `gsd-spike` shows up as a sibling skill (installed via `npx get-shit-done-cc --hermes`), prefer **`gsd-spike`** when the user wants the full GSD workflow: persistent `.planning/spikes/` state, MANIFEST tracking across sessions, Given/When/Then verdict format, and commit patterns that integrate with the rest of GSD. This skill is the lightweight standalone version for users who don't have (or don't want) the full system.
+
+## Core method
+
+Regardless of scale, every spike follows this loop:
+
+```
+decompose  →  research  →  build  →  verdict
+   ↑__________________________________________↓
+                  iterate on findings
+```
+
+### 1. Decompose
+
+Break the user's idea into **2-5 independent feasibility questions**. Each question is one spike. Present them as a table with Given/When/Then framing:
+
+| # | Spike | Validates (Given/When/Then) | Risk |
+|---|-------|----------------------------|------|
+| 001 | websocket-streaming | Given a WS connection, when LLM streams tokens, then client receives chunks < 100ms | High |
+| 002a | pdf-parse-pdfjs | Given a multi-page PDF, when parsed with pdfjs, then structured text is extractable | Medium |
+| 002b | pdf-parse-camelot | Given a multi-page PDF, when parsed with camelot, then structured text is extractable | Medium |
+
+**Spike types:**
+- **standard** — one approach answering one question
+- **comparison** — same question, different approaches (shared number, letter suffix `a`/`b`/`c`)
+
+**Good spike questions:** specific feasibility with observable output.
+**Bad spike questions:** too broad, no observable output, or just "read the docs about X".
+
+**Order by risk.** The spike most likely to kill the idea runs first. No point prototyping the easy parts if the hard part doesn't work.
+
+**Skip decomposition** only if the user already knows exactly what they want to spike and says so. Then take their idea as a single spike.
+
+### 2. Align (for multi-spike ideas)
+
+Present the spike table. Ask: "Build all in this order, or adjust?" Let the user drop, reorder, or re-frame before you write any code.
+
+### 3. Research (per spike, before building)
+
+Spikes are not research-free — you research enough to pick the right approach, then you build. Per spike:
+
+1. **Brief it.** 2-3 sentences: what this spike is, why it matters, key risk.
+2. **Surface competing approaches** if there's real choice:
+
+   | Approach | Tool/Library | Pros | Cons | Status |
+   |----------|-------------|------|------|--------|
+   | ... | ... | ... | ... | maintained / abandoned / beta |
+
+3. **Pick one.** State why. If 2+ are credible, build quick variants within the spike.
+4. **Skip research** for pure logic with no external dependencies.
+
+Use Hermes tools for the research step:
+
+- `web_search("python websocket streaming libraries 2025")` — find candidates
+- `web_extract(urls=["https://websockets.readthedocs.io/..."])` — read the actual docs (returns markdown)
+- `terminal("pip show websockets | grep Version")` — check what's installed in the project's venv
+
+For libraries without docs pages, clone and read their `README.md` / `examples/` via `read_file`. Context7 MCP (if the user has it configured) is also a good source — `mcp_*_resolve-library-id` then `mcp_*_query-docs`.
+
+### 4. Build
+
+One directory per spike. Keep it standalone.
+
+```
+spikes/
+├── 001-websocket-streaming/
+│   ├── README.md
+│   └── main.py
+├── 002a-pdf-parse-pdfjs/
+│   ├── README.md
+│   └── parse.js
+└── 002b-pdf-parse-camelot/
+    ├── README.md
+    └── parse.py
+```
+
+**Bias toward something the user can interact with.** Spikes fail when the only output is a log line that says "it works." The user wants to *feel* the spike working. Default choices, in order of preference:
+
+1. A runnable CLI that takes input and prints observable output
+2. A minimal HTML page that demonstrates the behavior
+3. A small web server with one endpoint
+4. A unit test that exercises the question with recognizable assertions
+
+**Depth over speed.** Never declare "it works" after one happy-path run. Test edge cases. Follow surprising findings. The verdict is only trustworthy when the investigation was honest.
+
+**Avoid** unless the spike specifically requires it: complex package management, build tools/bundlers, Docker, env files, config systems. Hardcode everything — it's a spike.
+
+**Building one spike** — a typical tool sequence:
+
+```
+terminal("mkdir -p spikes/001-websocket-streaming")
+write_file("spikes/001-websocket-streaming/README.md", "# 001: websocket-streaming\n\n...")
+write_file("spikes/001-websocket-streaming/main.py", "...")
+terminal("cd spikes/001-websocket-streaming && python3 main.py")
+# Observe output, iterate.
+```
+
+**Parallel comparison spikes (002a / 002b) — delegate.** When two approaches can run in parallel and both need real engineering (not 10-line prototypes), fan out with `delegate_task`:
+
+```
+delegate_task(tasks=[
+    {"goal": "Build 002a-pdf-parse-pdfjs: ...", "toolsets": ["terminal", "file", "web"]},
+    {"goal": "Build 002b-pdf-parse-camelot: ...", "toolsets": ["terminal", "file", "web"]},
+])
+```
+
+Each subagent returns its own verdict; you write the head-to-head.
+
+### 5. Verdict
+
+Each spike's `README.md` closes with:
+
+```markdown
+## Verdict: VALIDATED | PARTIAL | INVALIDATED
+
+### What worked
+- ...
+
+### What didn't
+- ...
+
+### Surprises
+- ...
+
+### Recommendation for the real build
+- ...
+```
+
+**VALIDATED** = the core question was answered yes, with evidence.
+**PARTIAL** = it works under constraints X, Y, Z — document them.
+**INVALIDATED** = doesn't work, for this reason. This is a successful spike.
+
+## Comparison spikes
+
+When two approaches answer the same question (002a / 002b), build them **back to back**, then do a head-to-head comparison at the end:
+
+```markdown
+## Head-to-head: pdfjs vs camelot
+
+| Dimension | pdfjs (002a) | camelot (002b) |
+|-----------|--------------|----------------|
+| Extraction quality | 9/10 structured | 7/10 table-only |
+| Setup complexity | npm install, 1 line | pip + ghostscript |
+| Perf on 100-page PDF | 3s | 18s |
+| Handles rotated text | no | yes |
+
+**Winner:** pdfjs for our use case. Camelot if we need table-first extraction later.
+```
+
+## Frontier mode (picking what to spike next)
+
+If spikes already exist and the user says "what should I spike next?", walk the existing directories and look for:
+
+- **Integration risks** — two validated spikes that touch the same resource but were tested independently
+- **Data handoffs** — spike A's output was assumed compatible with spike B's input; never proven
+- **Gaps in the vision** — capabilities assumed but unproven
+- **Alternative approaches** — different angles for PARTIAL or INVALIDATED spikes
+
+Propose 2-4 candidates as Given/When/Then. Let the user pick.
+
+## Output
+
+- Create `spikes/` (or `.planning/spikes/` if the user is using GSD conventions) in the repo root
+- One dir per spike: `NNN-descriptive-name/`
+- `README.md` per spike captures question, approach, results, verdict
+- Keep the code throwaway — a spike that takes 2 days to "clean up for production" was a bad spike
+
+## Attribution
+
+Adapted from the GSD (Get Shit Done) project's `/gsd-spike` workflow — MIT © 2025 Lex Christopherson ([gsd-build/get-shit-done](https://github.com/gsd-build/get-shit-done)). The full GSD system offers persistent spike state, MANIFEST tracking, and integration with a broader spec-driven development pipeline; install with `npx get-shit-done-cc --hermes --global`.
@@ -340,3 +340,12 @@ Catch issues early
 ```

 **Quality is not an accident. It's the result of systematic process.**
+
+## Further reading (load when relevant)
+
+When the orchestration involves significant context usage, long review loops, or complex validation checkpoints, load these references for the specific discipline:
+
+- **`references/context-budget-discipline.md`** — Four-tier context degradation model (PEAK / GOOD / DEGRADING / POOR), read-depth rules that scale with context window size, and early warning signs of silent degradation. Load when a run will clearly consume significant context (multi-phase plans, many subagents, large artifacts).
+- **`references/gates-taxonomy.md`** — The four canonical gate types (Pre-flight, Revision, Escalation, Abort) with behavior, recovery, and examples. Load when designing or reviewing any workflow that has validation checkpoints — use the vocabulary explicitly so each gate has defined entry, failure behavior, and resumption rules.
+
+Both references adapted from gsd-build/get-shit-done (MIT © 2025 Lex Christopherson).
@@ -0,0 +1,53 @@
+# Context Budget Discipline
+
+Practical rules for keeping orchestrator context lean when spawning subagents or reading large artifacts. Use these whenever you're running a multi-step agent loop that will consume significant context — plan execution, subagent orchestration, review pipelines, multi-file refactors.
+
+Adapted from the GSD (Get Shit Done) project's context-budget reference — MIT © 2025 Lex Christopherson ([gsd-build/get-shit-done](https://github.com/gsd-build/get-shit-done)).
+
+## Universal rules
+
+Every workflow that spawns agents or reads significant content must follow these:
+
+1. **Never read agent definition files.** `delegate_task` auto-loads them — you reading them too just doubles the cost.
+2. **Never inline large files into subagent prompts.** Tell the agent to read the file from disk with `read_file` instead. The subagent gets full content; your context stays lean.
+3. **Read depth scales with context window.** See the table below.
+4. **Delegate heavy work to subagents.** The orchestrator routes; it doesn't execute.
+5. **Proactively warn** the user when you've consumed significant context ("Context is getting heavy — consider checkpointing progress before we continue").
+
+## Read depth by context window
+
+Check the model's actual context window (not "it's Claude so 200K"). Some Sonnet deployments are 1M, some are 200K. If you don't know, assume the smaller one — err toward leanness.
+
+| Context window | Subagent output reading | Summary files | Verification files | Plans for other phases |
+|----------------|-------------------------|---------------|--------------------|-----------------------|
+| < 500k (e.g. 200k) | Frontmatter only | Frontmatter only | Frontmatter only | Current phase only |
+| >= 500k (1M models) | Full body permitted | Full body permitted | Full body permitted | Current phase only |
+
+"Frontmatter only" means: read enough to see the final status/verdict/conclusion. If the subagent wrote a 3000-line debug log, read the summary section it produced, not the log.
+
+## Four-tier degradation model
+
+Monitor your context usage and shift behavior as you climb the tiers. The point is to notice *before* you hit the wall, not when responses start truncating.
+
+| Tier | Usage | Behavior |
+|------|-------|----------|
+| **PEAK** | 0 – 30% | Full operations. Read bodies, spawn multiple agents in parallel, inline results freely. |
+| **GOOD** | 30 – 50% | Normal operations. Prefer frontmatter reads. Delegate aggressively. |
+| **DEGRADING** | 50 – 70% | Economize. Frontmatter-only reads, minimal inlining, **warn the user** about budget. |
+| **POOR** | 70%+ | Emergency mode. **Checkpoint progress immediately.** No new reads unless critical. Finish the current task and stop cleanly. |
+
+## Early warning signs (before panic thresholds fire)
+
+Quality degrades *gradually* before hard limits hit. Watch for these:
+
+- **Silent partial completion.** Subagent claims done but implementation is incomplete. Self-checks catch file existence, not semantic completeness. Always verify subagent output against the plan's must-haves, not just "did a file appear?"
+- **Increasing vagueness.** Agent starts using phrases like "appropriate handling" or "standard patterns" instead of specific code. This is context pressure showing up before budget warnings fire.
+- **Skipped protocol steps.** Agent omits steps it would normally follow. If success criteria has 8 items and the report covers 5, suspect context pressure, not "the agent decided 5 was enough."
+
+When these signs appear, checkpoint the work and either reset context or hand off to a fresh subagent.
+
+## Fundamental limitation
+
+When you orchestrate, you cannot verify semantic correctness of subagent output — only structural completeness ("did the file appear?", "does the test pass?"). Semantic verification requires either running the code yourself or delegating a review pass to another fresh subagent.
+
+**Mitigation:** in every task you delegate, include explicit "must-have" truths the subagent must confirm in its response (e.g., "confirm your test actually tests X, not just that X was imported"). The subagent re-asserting concrete facts is evidence; vague summaries are not.
@@ -0,0 +1,93 @@
+# Gates Taxonomy
+
+Canonical gate types for validation checkpoints across any workflow that spawns subagents, runs review loops, or has human-approval pauses. Every validation checkpoint maps to one of these four types — naming them explicitly makes the workflow legible and prevents "what happens when this check fails?" confusion.
+
+Adapted from the GSD (Get Shit Done) project's gates reference — MIT © 2025 Lex Christopherson ([gsd-build/get-shit-done](https://github.com/gsd-build/get-shit-done)).
+
+## The four gate types
+
+### 1. Pre-flight gate
+
+**Purpose:** Validates preconditions before starting an operation.
+
+**Behavior:** Blocks entry if conditions unmet. No partial work created — bail before anything changes.
+
+**Recovery:** Fix the missing precondition, then retry.
+
+**Examples:**
+- Implementation phase checks that the plan file exists before it starts writing code.
+- Delegated subagent checks that required env vars are set before making API calls.
+- Commit checks that tests passed before pushing.
+
+### 2. Revision gate
+
+**Purpose:** Evaluates output quality and routes to revision if insufficient.
+
+**Behavior:** Loops back to the producer with specific feedback. Bounded by an iteration cap (typically 3).
+
+**Recovery:** Producer addresses feedback; checker re-evaluates. The loop escalates early if issue count does not decrease between consecutive iterations (stall detection). After max iterations, escalates to the user unconditionally — never loop forever.
+
+**Examples:**
+- Plan reviewer reads a draft plan, returns specific issues, planner revises, reviewer re-reads (max 3 cycles).
+- Code reviewer checks subagent-produced code against must-haves; dispatches fixes back to the implementer if any must-have failed.
+- Test coverage checker validates new tests exercise the new paths; if not, sends back to author.
+
+### 3. Escalation gate
+
+**Purpose:** Surfaces unresolvable issues to the human for a decision.
+
+**Behavior:** Pauses workflow, presents options, waits for human input. Never guesses, never picks a default.
+
+**Recovery:** Human chooses action; workflow resumes on the selected path.
+
+**Examples:**
+- Revision loop exhausted after 3 iterations.
+- Merge conflict during automated worktree cleanup.
+- Ambiguous requirement — two reasonable interpretations and the choice changes the approach.
+- Subagent reports "the plan says X but the codebase actually does Y" — human decides which is right.
+
+### 4. Abort gate
+
+**Purpose:** Terminates the operation to prevent damage or waste.
+
+**Behavior:** Stops immediately, preserves state (checkpoint current progress), reports the specific reason.
+
+**Recovery:** Human investigates root cause, fixes, restarts from checkpoint.
+
+**Examples:**
+- Context window critically low during execution (POOR tier, >70%) — abort cleanly rather than produce truncated output.
+- Critical dependency unavailable mid-run (network down, API key revoked).
+- Unrecoverable filesystem state (disk full, permissions lost).
+- Safety invariant violated (agent attempted an irreversible destructive action outside approved scope).
+
+## How to use this in a skill
+
+When you write an orchestration skill that has validation checkpoints, **name each checkpoint by its gate type explicitly** and answer three questions:
+
+1. **What condition triggers this gate?** (e.g., "plan file missing", "issue count didn't decrease", "context >70%")
+2. **What happens when it fails?** (block / loop back / ask human / abort)
+3. **Who resumes, and from where?** (fix precondition + retry, revise + re-check, human decision, restart from checkpoint)
+
+Answering these three up front means your skill never hits "what do we do now?" at runtime.
+
+## Example — a review loop with all four gate types
+
+```
+[Pre-flight] plan.md exists and is non-empty?   → no: bail, ask user to write a plan first
+                ↓ yes
+[Execute]  subagent implements task
+                ↓
+[Revision] reviewer checks against must-haves  → fail: loop back to subagent (max 3)
+                ↓ pass
+[Pre-flight] tests pass?                       → no: bail, report failing tests
+                ↓ yes
+[Commit]
+                ↓
+(on revision loop exhaustion)
+[Escalation] "3 review cycles failed to converge on issue X — pick: force-merge, rewrite task, abandon"
+                ↓ user picks
+(on any tier-POOR context pressure during loop)
+[Abort] "context at 73%, checkpointing and stopping"
+```
+
+The vocabulary is small on purpose. Every gate in every workflow should fit one of these four. If you find yourself inventing a fifth, it's probably a revision gate with extra branching, or an escalation gate in disguise.
@@ -68,33 +68,6 @@ class TestBuildAnthropicClient:
            assert "fine-grained-tool-streaming-2025-05-14" in betas
            assert "api_key" not in kwargs

-    def test_oauth_does_not_send_claude_code_spoof_headers(self):
-        """OAuth requests identify as Hermes — no claude-cli UA, no x-app: cli.
-
-        Anthropic's OAuth-gated Messages API accepts requests from non-Claude-Code
-        clients as long as auth is correct and the OAuth beta headers are present.
-        See commit that removed fingerprinting for the live-test write-up.
-        """
-        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
-            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
-            headers = mock_sdk.Anthropic.call_args[1]["default_headers"]
-            assert "user-agent" not in {k.lower() for k in headers}
-            assert "x-app" not in {k.lower() for k in headers}
-
-    def test_oauth_strips_context_1m_beta(self):
-        """context-1m-2025-08-07 is incompatible with OAuth auth — must be stripped.
-
-        Anthropic returns HTTP 400 "This authentication style is incompatible
-        with the long context beta header." when OAuth traffic carries it.
-        """
-        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
-            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
-            betas = mock_sdk.Anthropic.call_args[1]["default_headers"]["anthropic-beta"]
-            assert "context-1m-2025-08-07" not in betas
-            # But other common betas still flow through
-            assert "interleaved-thinking-2025-05-14" in betas
-            assert "oauth-2025-04-20" in betas
-
    def test_api_key_uses_api_key(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            build_anthropic_client("sk-ant-api03-something")
@@ -113,7 +86,7 @@ class TestBuildAnthropicClient:
            kwargs = mock_sdk.Anthropic.call_args[1]
            assert kwargs["base_url"] == "https://custom.api.com"
            assert kwargs["default_headers"] == {
-                "anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
+                "anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07"
            }

    def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
@@ -1635,3 +1635,106 @@ class TestCodexAdapterReasoningTranslation:
        )
        assert "reasoning" not in captured

+
+
+class TestVisionAutoSkipsKimiCoding:
+    """_resolve_auto vision branch skips providers that have no vision on
+    their main endpoint (e.g. Kimi Coding Plan /coding) and falls through
+    to the aggregator chain instead of handing back a client that will 404
+    on every request (#17076).
+    """
+
+    def test_kimi_coding_skipped_falls_through_to_openrouter(self, monkeypatch):
+        """kimi-coding as main + vision auto → OpenRouter (not kimi)."""
+        fake_or_client = MagicMock(name="openrouter_client")
+
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding",
+        )
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_model", lambda: "kimi-code",
+        )
+        # Guard: if the skip doesn't fire, _resolve_strict_vision_backend
+        # and resolve_provider_client both would try kimi-coding — detect
+        # either via the main-provider call and fail loud.
+        rpc_mock = MagicMock(side_effect=AssertionError(
+            "resolve_provider_client should NOT be called for kimi-coding "
+            "on the vision auto path"))
+        monkeypatch.setattr(
+            "agent.auxiliary_client.resolve_provider_client", rpc_mock,
+        )
+
+        def fake_strict(provider, model=None):
+            if provider == "openrouter":
+                return fake_or_client, "google/gemini-3-flash-preview"
+            if provider == "nous":
+                return None, None
+            raise AssertionError(
+                f"strict vision backend should not be called for {provider!r} "
+                "when main provider is kimi-coding"
+            )
+        monkeypatch.setattr(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            fake_strict,
+        )
+
+        provider, client, model = resolve_vision_provider_client()
+        assert provider == "openrouter"
+        assert client is fake_or_client
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_kimi_coding_cn_skipped_too(self, monkeypatch):
+        """Same skip applies to the CN variant."""
+        fake_or_client = MagicMock(name="openrouter_client")
+
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_provider", lambda: "kimi-coding-cn",
+        )
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_model", lambda: "kimi-code",
+        )
+        rpc_mock = MagicMock(side_effect=AssertionError(
+            "resolve_provider_client should NOT be called for kimi-coding-cn"))
+        monkeypatch.setattr(
+            "agent.auxiliary_client.resolve_provider_client", rpc_mock,
+        )
+        monkeypatch.setattr(
+            "agent.auxiliary_client._resolve_strict_vision_backend",
+            lambda p, m=None: (fake_or_client, "gemini")
+            if p == "openrouter"
+            else (None, None),
+        )
+
+        provider, client, _ = resolve_vision_provider_client()
+        assert provider == "openrouter"
+        assert client is fake_or_client
+
+    def test_explicit_override_to_kimi_coding_still_honored(self, monkeypatch):
+        """When a user *explicitly* requests kimi-coding for vision (e.g.
+        they know what they're doing, or are running a future build that
+        adds image_in capability to Kimi Code), the explicit path still
+        routes to kimi-coding — only the auto branch applies the skip.
+        """
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_provider", lambda: "openrouter",
+        )
+        fake_kimi_client = MagicMock(name="kimi_client")
+        gcc_mock = MagicMock(return_value=(fake_kimi_client, "kimi-code"))
+        monkeypatch.setattr(
+            "agent.auxiliary_client._get_cached_client", gcc_mock,
+        )
+
+        provider, client, model = resolve_vision_provider_client(
+            provider="kimi-coding",
+        )
+        assert provider == "kimi-coding"
+        assert client is fake_kimi_client
+        gcc_mock.assert_called_once()
+
+    def test_skip_set_covers_exactly_known_entries(self):
+        """Guard against accidental widening of the skip list."""
+        from agent.auxiliary_client import _PROVIDERS_WITHOUT_VISION
+        assert _PROVIDERS_WITHOUT_VISION == frozenset({
+            "kimi-coding",
+            "kimi-coding-cn",
+        })
@@ -0,0 +1,487 @@
+"""Tests for agent/curator.py — orchestrator, idle gating, state transitions.
+
+LLM spawning is never exercised here — `_run_llm_review` is monkeypatched so
+tests run fully offline and the curator module doesn't need real credentials.
+"""
+
+from __future__ import annotations
+
+import importlib
+import json
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def curator_env(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME + freshly reloaded curator + skill_usage modules."""
+    home = tmp_path / ".hermes"
+    (home / "skills").mkdir(parents=True)
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    import tools.skill_usage as usage
+    importlib.reload(usage)
+    import agent.curator as curator
+    importlib.reload(curator)
+
+    # Neutralize the real LLM pass by default — tests opt in per-case.
+    monkeypatch.setattr(curator, "_run_llm_review", lambda prompt: "llm-stub")
+
+    # Default: no config file → curator defaults. Tests can override.
+    monkeypatch.setattr(curator, "_load_config", lambda: {})
+
+    return {"home": home, "curator": curator, "usage": usage}
+
+
+def _write_skill(skills_dir: Path, name: str):
+    d = skills_dir / name
+    d.mkdir(parents=True, exist_ok=True)
+    (d / "SKILL.md").write_text(
+        f"---\nname: {name}\ndescription: x\n---\n", encoding="utf-8",
+    )
+    return d
+
+
+# ---------------------------------------------------------------------------
+# Config gates
+# ---------------------------------------------------------------------------
+
+def test_curator_enabled_default_true(curator_env):
+    assert curator_env["curator"].is_enabled() is True
+
+
+def test_curator_disabled_via_config(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
+    assert c.is_enabled() is False
+    assert c.should_run_now() is False
+
+
+def test_curator_defaults(curator_env):
+    c = curator_env["curator"]
+    assert c.get_interval_hours() == 24 * 7  # 7 days
+    assert c.get_min_idle_hours() == 2
+    assert c.get_stale_after_days() == 30
+    assert c.get_archive_after_days() == 90
+
+
+def test_curator_config_overrides(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    monkeypatch.setattr(c, "_load_config", lambda: {
+        "interval_hours": 12,
+        "min_idle_hours": 0.5,
+        "stale_after_days": 7,
+        "archive_after_days": 60,
+    })
+    assert c.get_interval_hours() == 12
+    assert c.get_min_idle_hours() == 0.5
+    assert c.get_stale_after_days() == 7
+    assert c.get_archive_after_days() == 60
+
+
+# ---------------------------------------------------------------------------
+# should_run_now
+# ---------------------------------------------------------------------------
+
+def test_first_run_always_eligible(curator_env):
+    c = curator_env["curator"]
+    assert c.should_run_now() is True
+
+
+def test_recent_run_blocks(curator_env):
+    c = curator_env["curator"]
+    c.save_state({
+        "last_run_at": datetime.now(timezone.utc).isoformat(),
+        "paused": False,
+    })
+    assert c.should_run_now() is False
+
+
+def test_old_run_eligible(curator_env):
+    """A run older than the configured interval should re-trigger. Use a
+    2x-interval cushion so the test doesn't become coupled to the exact
+    default — bumping DEFAULT_INTERVAL_HOURS shouldn't break it."""
+    c = curator_env["curator"]
+    long_ago = datetime.now(timezone.utc) - timedelta(
+        hours=c.get_interval_hours() * 2
+    )
+    c.save_state({"last_run_at": long_ago.isoformat(), "paused": False})
+    assert c.should_run_now() is True
+
+
+def test_paused_blocks_even_if_stale(curator_env):
+    c = curator_env["curator"]
+    long_ago = datetime.now(timezone.utc) - timedelta(days=30)
+    c.save_state({"last_run_at": long_ago.isoformat(), "paused": True})
+    assert c.should_run_now() is False
+
+
+def test_set_paused_roundtrip(curator_env):
+    c = curator_env["curator"]
+    c.set_paused(True)
+    assert c.is_paused() is True
+    c.set_paused(False)
+    assert c.is_paused() is False
+
+
+# ---------------------------------------------------------------------------
+# Automatic state transitions
+# ---------------------------------------------------------------------------
+
+def test_unused_skill_transitions_to_stale(curator_env):
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "old-skill")
+
+    # Record last-use well past stale_after_days (30 default)
+    long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat()
+    data = u.load_usage()
+    data["old-skill"] = u._empty_record()
+    data["old-skill"]["last_used_at"] = long_ago
+    data["old-skill"]["created_at"] = long_ago
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    assert counts["marked_stale"] == 1
+    assert u.get_record("old-skill")["state"] == "stale"
+
+
+def test_very_old_skill_gets_archived(curator_env):
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    skill_dir = _write_skill(skills_dir, "ancient")
+
+    super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat()
+    data = u.load_usage()
+    data["ancient"] = u._empty_record()
+    data["ancient"]["last_used_at"] = super_old
+    data["ancient"]["created_at"] = super_old
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    assert counts["archived"] == 1
+    assert not skill_dir.exists()
+    assert (skills_dir / ".archive" / "ancient" / "SKILL.md").exists()
+    assert u.get_record("ancient")["state"] == "archived"
+
+
+def test_pinned_skill_is_never_touched(curator_env):
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "precious")
+
+    super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
+    data = u.load_usage()
+    data["precious"] = u._empty_record()
+    data["precious"]["last_used_at"] = super_old
+    data["precious"]["created_at"] = super_old
+    data["precious"]["pinned"] = True
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    assert counts["archived"] == 0
+    assert counts["marked_stale"] == 0
+    rec = u.get_record("precious")
+    assert rec["state"] == "active"  # untouched
+    assert rec["pinned"] is True
+
+
+def test_stale_skill_reactivates_on_recent_use(curator_env):
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "revived")
+
+    recent = datetime.now(timezone.utc).isoformat()
+    data = u.load_usage()
+    data["revived"] = u._empty_record()
+    data["revived"]["state"] = "stale"
+    data["revived"]["last_used_at"] = recent
+    data["revived"]["created_at"] = recent
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    assert counts["reactivated"] == 1
+    assert u.get_record("revived")["state"] == "active"
+
+
+def test_new_skill_without_last_used_not_immediately_archived(curator_env):
+    """A freshly-created skill with no use history should not get archived
+    just because last_used_at is None."""
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "fresh")
+
+    # Bump nothing — record doesn't exist yet. Curator should create it
+    # and fall back to created_at which is ~now.
+    counts = c.apply_automatic_transitions()
+    assert counts["archived"] == 0
+    assert counts["marked_stale"] == 0
+    assert (skills_dir / "fresh").exists()
+
+
+def test_bundled_skill_not_touched_by_transitions(curator_env):
+    c = curator_env["curator"]
+    u = curator_env["usage"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "bundled")
+    (skills_dir / ".bundled_manifest").write_text(
+        "bundled:abc\n", encoding="utf-8",
+    )
+
+    super_old = (datetime.now(timezone.utc) - timedelta(days=500)).isoformat()
+    data = u.load_usage()
+    data["bundled"] = u._empty_record()
+    data["bundled"]["last_used_at"] = super_old
+    u.save_usage(data)
+
+    counts = c.apply_automatic_transitions()
+    # bundled skills are excluded from the agent-created list entirely
+    assert counts["checked"] == 0
+    assert (skills_dir / "bundled").exists()  # never moved
+
+
+# ---------------------------------------------------------------------------
+# run_curator_review orchestration
+# ---------------------------------------------------------------------------
+
+def test_run_review_records_state(curator_env):
+    c = curator_env["curator"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "a")
+
+    result = c.run_curator_review(synchronous=True)
+    assert "started_at" in result
+    state = c.load_state()
+    assert state["last_run_at"] is not None
+    assert state["run_count"] >= 1
+    assert state["last_run_summary"] is not None
+
+
+def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "a")
+
+    calls = []
+    def _stub(prompt):
+        calls.append(prompt)
+        return {
+            "final": "stubbed-summary",
+            "summary": "stubbed-summary",
+            "model": "stub-model",
+            "provider": "stub-provider",
+            "tool_calls": [],
+            "error": None,
+        }
+    monkeypatch.setattr(c, "_run_llm_review", _stub)
+
+    captured = []
+    c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
+
+    assert len(calls) == 1
+    assert "skill CURATOR" in calls[0] or "CURATOR" in calls[0]
+    assert captured  # on_summary was called
+    assert any("stubbed-summary" in s for s in captured)
+
+
+def test_run_review_skips_llm_when_no_candidates(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    # No skills in the dir → no candidates
+    calls = []
+    monkeypatch.setattr(
+        c, "_run_llm_review",
+        lambda prompt: (calls.append(prompt), "never-called")[1],
+    )
+
+    captured = []
+    c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
+
+    assert calls == []  # LLM not invoked
+    assert any("skipped" in s for s in captured)
+
+
+def test_maybe_run_curator_respects_disabled(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
+    result = c.maybe_run_curator()
+    assert result is None
+
+
+def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    monkeypatch.setattr(c, "_load_config", lambda: {"min_idle_hours": 2})
+    # idle less than the threshold
+    result = c.maybe_run_curator(idle_for_seconds=60.0)
+    assert result is None
+
+
+def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
+    c = curator_env["curator"]
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "a")
+    # Force idle over threshold
+    result = c.maybe_run_curator(idle_for_seconds=99999.0)
+    assert result is not None
+    assert "started_at" in result
+
+
+def test_maybe_run_curator_swallows_exceptions(curator_env, monkeypatch):
+    c = curator_env["curator"]
+
+    def explode():
+        raise RuntimeError("boom")
+
+    monkeypatch.setattr(c, "should_run_now", explode)
+    # Must not raise
+    assert c.maybe_run_curator() is None
+
+
+# ---------------------------------------------------------------------------
+# Persistence
+# ---------------------------------------------------------------------------
+
+def test_state_file_survives_corrupt_read(curator_env):
+    c = curator_env["curator"]
+    c._state_file().write_text("not json", encoding="utf-8")
+    # Must fall back to default, not raise
+    assert c.load_state() == c._default_state()
+
+
+def test_state_atomic_write_no_tmp_leftovers(curator_env):
+    c = curator_env["curator"]
+    c.save_state({"paused": True})
+    parent = c._state_file().parent
+    for p in parent.iterdir():
+        assert not p.name.startswith(".curator_state_"), f"tmp leftover: {p.name}"
+
+
+def test_curator_review_prompt_has_invariants():
+    """Core invariants must be in the review prompt text."""
+    from agent.curator import CURATOR_REVIEW_PROMPT
+    assert "MUST NOT" in CURATOR_REVIEW_PROMPT or "DO NOT" in CURATOR_REVIEW_PROMPT
+    assert "bundled" in CURATOR_REVIEW_PROMPT.lower()
+    assert "delete" in CURATOR_REVIEW_PROMPT.lower()
+    assert "pinned" in CURATOR_REVIEW_PROMPT.lower()
+    # Must describe the actions the reviewer can take. The exact vocabulary
+    # has tightened over time (the umbrella-first prompt drops 'keep' as a
+    # first-class decision verb, since passive keep-everything is the
+    # failure mode the prompt is trying to avoid), but the core merge /
+    # archive / patch trio must remain callable.
+    for verb in ("patch", "archive"):
+        assert verb in CURATOR_REVIEW_PROMPT.lower()
+    # Must mention consolidation (possibly via "merge" or "consolidat")
+    assert "consolidat" in CURATOR_REVIEW_PROMPT.lower() or "merge" in CURATOR_REVIEW_PROMPT.lower()
+
+
+def test_curator_review_prompt_points_at_existing_tools_only():
+    """The review prompt must rely on existing tools (skill_manage + terminal)
+    and must NOT reference bespoke curator tools that are not registered
+    model tools."""
+    from agent.curator import CURATOR_REVIEW_PROMPT
+    assert "skill_manage" in CURATOR_REVIEW_PROMPT
+    assert "skills_list" in CURATOR_REVIEW_PROMPT
+    assert "skill_view" in CURATOR_REVIEW_PROMPT
+    assert "terminal" in CURATOR_REVIEW_PROMPT.lower()
+    # These would be nice but aren't actually registered as tools — the
+    # curator uses skill_manage + terminal mv instead.
+    assert "archive_skill" not in CURATOR_REVIEW_PROMPT
+    assert "pin_skill" not in CURATOR_REVIEW_PROMPT
+
+
+def test_curator_does_not_instruct_model_to_pin():
+    """Pinning is a user opt-out, not a model decision. The prompt should
+    not tell the reviewer to pin skills autonomously."""
+    from agent.curator import CURATOR_REVIEW_PROMPT
+    # "pinned" appears in the invariant ("skip pinned skills"), but "pin"
+    # as a decision verb should not.
+    lines = CURATOR_REVIEW_PROMPT.split("\n")
+    decision_block = "\n".join(
+        l for l in lines
+        if l.strip().startswith(("keep", "patch", "archive", "consolidate", "pin "))
+    )
+    # No standalone "pin" action line
+    assert not any(l.strip().startswith("pin ") for l in lines), (
+        f"Found a pin action line in:\n{decision_block}"
+    )
+
+
+def test_curator_review_prompt_is_umbrella_first():
+    """The curator prompt must push umbrella-building / class-level thinking,
+    not pair-level 'are these two the same?' analysis."""
+    from agent.curator import CURATOR_REVIEW_PROMPT
+    lower = CURATOR_REVIEW_PROMPT.lower()
+    # Must frame the task as active umbrella-building, not a passive audit.
+    assert "umbrella" in lower, (
+        "must use UMBRELLA framing — the class-first abstraction the curator "
+        "is designed to produce"
+    )
+    # Must tell the reviewer not to stop at pair-level distinctness.
+    assert "class" in lower, "must reference class-level thinking"
+    # Must cover the three consolidation methods explicitly
+    assert "references/" in CURATOR_REVIEW_PROMPT, (
+        "must name references/ as a demotion target for session-specific content"
+    )
+    # templates/ and scripts/ make the umbrella a real class-level skill
+    assert "templates/" in CURATOR_REVIEW_PROMPT
+    assert "scripts/" in CURATOR_REVIEW_PROMPT
+    # Must say the counter argument: usage=0 is not a reason to skip
+    assert "use_count" in CURATOR_REVIEW_PROMPT or "counter" in lower, (
+        "must pre-empt the 'usage counters are zero, I can't judge' bailout"
+    )
+
+
+def test_curator_review_prompt_offers_support_file_actions():
+    """Support-file demotion (references/templates/scripts) must be one of
+    the three consolidation methods, alongside merge-into-existing and
+    create-new-umbrella."""
+    from agent.curator import CURATOR_REVIEW_PROMPT
+    # skill_manage action=write_file is how references/ are added to an
+    # existing skill — this is the create-adjacent action the curator needs
+    # to demote narrow siblings without touching their SKILL.md.
+    assert "write_file" in CURATOR_REVIEW_PROMPT
+    # Must offer creating a brand-new umbrella when no existing one fits
+    assert "action=create" in CURATOR_REVIEW_PROMPT or "create a new umbrella" in CURATOR_REVIEW_PROMPT.lower()
+
+
+
+def test_cli_unpin_refuses_bundled_skill(curator_env, capsys):
+    """hermes curator unpin must refuse bundled/hub skills too (matches pin)."""
+    from hermes_cli import curator as cli
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "ship-skill")
+    (skills_dir / ".bundled_manifest").write_text(
+        "ship-skill:abc\n", encoding="utf-8",
+    )
+
+    class _A:
+        skill = "ship-skill"
+
+    rc = cli._cmd_unpin(_A())
+    captured = capsys.readouterr()
+    assert rc == 1
+    assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
+
+
+def test_cli_pin_refuses_bundled_skill(curator_env, capsys):
+    from hermes_cli import curator as cli
+    skills_dir = curator_env["home"] / "skills"
+    _write_skill(skills_dir, "ship-skill")
+    (skills_dir / ".bundled_manifest").write_text(
+        "ship-skill:abc\n", encoding="utf-8",
+    )
+
+    class _A:
+        skill = "ship-skill"
+
+    rc = cli._cmd_pin(_A())
+    captured = capsys.readouterr()
+    assert rc == 1
+    assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
@@ -0,0 +1,258 @@
+"""Tests for the curator per-run report writer (run.json + REPORT.md).
+
+Reports live under ``~/.hermes/logs/curator/{YYYYMMDD-HHMMSS}/`` alongside
+the standard log dir, not inside the user's ``skills/`` data directory.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def curator_env(tmp_path, monkeypatch):
+    """Isolated HERMES_HOME with a skills/ dir + reset curator module state."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    (home / "skills").mkdir()
+    (home / "logs").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+    import importlib
+    import hermes_constants
+    importlib.reload(hermes_constants)
+    from agent import curator
+    importlib.reload(curator)
+    from tools import skill_usage
+    importlib.reload(skill_usage)
+    yield {"home": home, "curator": curator, "skill_usage": skill_usage}
+
+
+def _make_llm_meta(**overrides):
+    base = {
+        "final": "short summary of the pass",
+        "summary": "short summary",
+        "model": "test-model",
+        "provider": "test-provider",
+        "tool_calls": [],
+        "error": None,
+    }
+    base.update(overrides)
+    return base
+
+
+def test_reports_root_is_under_logs_not_skills(curator_env):
+    """Reports live in logs/curator/, not skills/ — operational telemetry
+    belongs with the logs, not with user-authored skill data."""
+    curator = curator_env["curator"]
+    root = curator._reports_root()
+    home = curator_env["home"]
+    # Must be under logs/
+    assert root == home / "logs" / "curator"
+    # Must NOT be under skills/
+    assert "skills" not in root.parts
+
+
+def test_write_run_report_creates_both_files(curator_env):
+    """Each run writes both a run.json (machine) and a REPORT.md (human)."""
+    curator = curator_env["curator"]
+    start = datetime.now(timezone.utc)
+
+    run_dir = curator._write_run_report(
+        started_at=start,
+        elapsed_seconds=12.345,
+        auto_counts={"checked": 5, "marked_stale": 1, "archived": 0, "reactivated": 0},
+        auto_summary="1 marked stale",
+        before_report=[],
+        before_names=set(),
+        after_report=[],
+        llm_meta=_make_llm_meta(),
+    )
+    assert run_dir is not None
+    assert run_dir.is_dir()
+    assert (run_dir / "run.json").exists()
+    assert (run_dir / "REPORT.md").exists()
+
+    # The directory name is a timestamp under logs/curator/
+    assert run_dir.parent == curator._reports_root()
+
+
+def test_run_json_has_expected_shape(curator_env):
+    """run.json must carry the machine-readable fields downstream tooling needs."""
+    curator = curator_env["curator"]
+    start = datetime.now(timezone.utc)
+
+    before_report = [
+        {"name": "old-thing", "state": "active", "pinned": False},
+        {"name": "keeper", "state": "active", "pinned": True},
+    ]
+    after_report = [
+        {"name": "keeper", "state": "active", "pinned": True},
+        {"name": "new-umbrella", "state": "active", "pinned": False},
+    ]
+
+    run_dir = curator._write_run_report(
+        started_at=start,
+        elapsed_seconds=42.0,
+        auto_counts={"checked": 2, "marked_stale": 0, "archived": 0, "reactivated": 0},
+        auto_summary="no changes",
+        before_report=before_report,
+        before_names={r["name"] for r in before_report},
+        after_report=after_report,
+        llm_meta=_make_llm_meta(
+            final="I consolidated the whole universe.",
+            tool_calls=[
+                {"name": "skills_list", "arguments": "{}"},
+                {"name": "skill_manage", "arguments": '{"action":"create"}'},
+                {"name": "terminal", "arguments": "mv ..."},
+            ],
+        ),
+    )
+    payload = json.loads((run_dir / "run.json").read_text())
+
+    # top-level shape
+    for k in (
+        "started_at", "duration_seconds", "model", "provider",
+        "auto_transitions", "counts", "tool_call_counts",
+        "archived", "added", "state_transitions",
+        "llm_final", "llm_summary", "llm_error", "tool_calls",
+    ):
+        assert k in payload, f"missing key: {k}"
+
+    # Diff logic
+    assert payload["archived"] == ["old-thing"]
+    assert payload["added"] == ["new-umbrella"]
+    # Counts reflect the diff
+    assert payload["counts"]["before"] == 2
+    assert payload["counts"]["after"] == 2
+    assert payload["counts"]["archived_this_run"] == 1
+    assert payload["counts"]["added_this_run"] == 1
+    # Tool call counts are aggregated
+    assert payload["tool_call_counts"]["skills_list"] == 1
+    assert payload["tool_call_counts"]["skill_manage"] == 1
+    assert payload["tool_call_counts"]["terminal"] == 1
+    assert payload["counts"]["tool_calls_total"] == 3
+
+
+def test_report_md_is_human_readable(curator_env):
+    """REPORT.md should be a valid markdown doc with the key sections visible."""
+    curator = curator_env["curator"]
+    start = datetime.now(timezone.utc)
+
+    run_dir = curator._write_run_report(
+        started_at=start,
+        elapsed_seconds=75.0,
+        auto_counts={"checked": 10, "marked_stale": 2, "archived": 1, "reactivated": 0},
+        auto_summary="2 marked stale, 1 archived",
+        before_report=[{"name": "foo", "state": "active", "pinned": False}],
+        before_names={"foo"},
+        after_report=[{"name": "foo-umbrella", "state": "active", "pinned": False}],
+        llm_meta=_make_llm_meta(
+            final="Consolidated foo-like skills into foo-umbrella.",
+            model="claude-opus-4.7",
+            provider="openrouter",
+        ),
+    )
+    md = (run_dir / "REPORT.md").read_text()
+
+    # Structural checks
+    assert "# Curator run" in md
+    assert "Auto-transitions" in md
+    assert "LLM consolidation pass" in md
+    assert "Recovery" in md
+
+    # The model / provider we passed in show up
+    assert "claude-opus-4.7" in md
+    assert "openrouter" in md
+
+    # The added/archived lists are present
+    assert "Skills archived" in md
+    assert "`foo`" in md
+    assert "New skills this run" in md
+    assert "`foo-umbrella`" in md
+
+    # The full LLM final response is included verbatim (no 240-char truncation)
+    assert "Consolidated foo-like skills into foo-umbrella." in md
+
+
+def test_same_second_reruns_get_unique_dirs(curator_env):
+    """If the curator somehow runs twice in the same second, the second
+    report still gets its own directory rather than overwriting the first."""
+    curator = curator_env["curator"]
+    start = datetime(2026, 4, 29, 5, 33, 34, tzinfo=timezone.utc)
+
+    kwargs = dict(
+        started_at=start,
+        elapsed_seconds=1.0,
+        auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
+        auto_summary="no changes",
+        before_report=[],
+        before_names=set(),
+        after_report=[],
+        llm_meta=_make_llm_meta(),
+    )
+    a = curator._write_run_report(**kwargs)
+    b = curator._write_run_report(**kwargs)
+    assert a != b
+    assert a is not None and b is not None
+    # Second dir has a numeric disambiguator suffix
+    assert b.name.startswith(a.name)
+
+
+def test_report_captures_llm_error_and_continues(curator_env):
+    """If the LLM pass recorded an error, the report still writes and
+    surfaces the error prominently."""
+    curator = curator_env["curator"]
+    run_dir = curator._write_run_report(
+        started_at=datetime.now(timezone.utc),
+        elapsed_seconds=2.0,
+        auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
+        auto_summary="no changes",
+        before_report=[],
+        before_names=set(),
+        after_report=[],
+        llm_meta=_make_llm_meta(
+            error="HTTP 400: No models provided",
+            final="",
+            summary="error",
+        ),
+    )
+    md = (run_dir / "REPORT.md").read_text()
+    assert "HTTP 400" in md
+    payload = json.loads((run_dir / "run.json").read_text())
+    assert payload["llm_error"] == "HTTP 400: No models provided"
+
+
+def test_state_transitions_captured_in_report(curator_env):
+    """When a skill moves active → stale or stale → archived between
+    before/after snapshots, the report records it."""
+    curator = curator_env["curator"]
+    start = datetime.now(timezone.utc)
+
+    before = [{"name": "getting-old", "state": "active", "pinned": False}]
+    after = [{"name": "getting-old", "state": "stale", "pinned": False}]
+
+    run_dir = curator._write_run_report(
+        started_at=start,
+        elapsed_seconds=1.0,
+        auto_counts={"checked": 1, "marked_stale": 1, "archived": 0, "reactivated": 0},
+        auto_summary="1 marked stale",
+        before_report=before,
+        before_names={r["name"] for r in before},
+        after_report=after,
+        llm_meta=_make_llm_meta(),
+    )
+    payload = json.loads((run_dir / "run.json").read_text())
+    assert payload["state_transitions"] == [
+        {"name": "getting-old", "from": "active", "to": "stale"}
+    ]
+    md = (run_dir / "REPORT.md").read_text()
+    assert "State transitions" in md
+    assert "getting-old" in md
+    assert "active → stale" in md
@@ -0,0 +1,242 @@
+"""Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint.
+
+DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages
+protocol but, when thinking mode is enabled, requires ``thinking`` blocks from
+prior assistant turns to round-trip on subsequent requests.  The generic
+third-party path strips them (signatures are Anthropic-proprietary and other
+proxies cannot validate them), so without a DeepSeek-specific carve-out the
+next tool-call turn fails with HTTP 400::
+
+    The content[].thinking in the thinking mode must be passed back to the
+    API.
+
+DeepSeek's compatibility matrix lists ``thinking`` as supported but
+``redacted_thinking`` and ``cache_control`` on thinking blocks as not
+supported.  Handling is the same as Kimi's ``/coding`` endpoint: strip
+Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned
+blocks that Hermes synthesises from ``reasoning_content``.
+
+See hermes-agent#16748.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestDeepSeekAnthropicPreservesThinking:
+    """convert_messages_to_anthropic must replay DeepSeek thinking blocks."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.deepseek.com/anthropic",
+            "https://api.deepseek.com/anthropic/",
+            "https://api.deepseek.com/anthropic/v1",
+            "https://API.DeepSeek.com/anthropic",
+        ],
+    )
+    def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None:
+        """Unsigned thinking (synthesised from reasoning_content) must be preserved."""
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "planning the tool call",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "skill_view", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url=base_url
+        )
+
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert len(thinking_blocks) == 1, (
+            f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking "
+            "blocks synthesised from reasoning_content — upstream rejects "
+            "replayed tool-call messages without them."
+        )
+        assert thinking_blocks[0]["thinking"] == "planning the tool call"
+        # Synthesised block — never has a signature
+        assert "signature" not in thinking_blocks[0]
+
+    def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None:
+        """DeepSeek validates history across every prior assistant turn, not just last."""
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "q1"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+            {"role": "user", "content": "q2"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r2",
+                "tool_calls": [
+                    {
+                        "id": "call_2",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_2", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+
+        assistants = [m for m in converted if m["role"] == "assistant"]
+        assert len(assistants) == 2
+        for assistant, expected in zip(assistants, ("r1", "r2")):
+            thinking = [
+                b for b in assistant["content"]
+                if isinstance(b, dict) and b.get("type") == "thinking"
+            ]
+            assert len(thinking) == 1
+            assert thinking[0]["thinking"] == expected
+
+    def test_signed_anthropic_thinking_block_is_stripped(self) -> None:
+        """Anthropic-signed blocks (that leaked through) must still be stripped.
+
+        DeepSeek issues its own signatures and cannot validate Anthropic's —
+        the strip-signed / keep-unsigned split matches the Kimi policy.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "thinking",
+                        "thinking": "anthropic-signed payload",
+                        "signature": "anthropic-sig-xyz",
+                    },
+                    {"type": "text", "text": "hello"},
+                ],
+            },
+            {"role": "user", "content": "again"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert thinking_blocks == [], (
+            "Signed Anthropic thinking blocks must be stripped on DeepSeek — "
+            "DeepSeek cannot validate Anthropic-proprietary signatures."
+        )
+
+    def test_cache_control_stripped_from_thinking_block(self) -> None:
+        """cache_control must still be stripped even when the block is preserved.
+
+        DeepSeek's compatibility matrix lists cache_control on thinking blocks
+        as ignored — cache markers interfere with signature validation on
+        upstreams that do check them, so Hermes strips them everywhere.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        # Inject cache_control on the synthesised thinking block after-the-fact
+        # by running conversion once, mutating, then re-running would be
+        # indirect.  Instead check the simpler invariant: no thinking block in
+        # the converted output carries cache_control.
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.deepseek.com/anthropic"
+        )
+        for m in converted:
+            if not isinstance(m.get("content"), list):
+                continue
+            for b in m["content"]:
+                if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"):
+                    assert "cache_control" not in b
+
+    def test_openai_compat_deepseek_base_is_not_matched(self) -> None:
+        """The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the
+        DeepSeek /anthropic branch — it never reaches this adapter, but the
+        detector should still fail closed so an accidental misuse doesn't
+        quietly send signed Anthropic blocks to an OpenAI endpoint.
+        """
+        from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint
+
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True
+        assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True
+
+    def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must keep the
+        generic strip-all behaviour (they reject unsigned blocks outright).
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "r1",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "f", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        _system, converted = convert_messages_to_anthropic(
+            messages, base_url="https://api.minimax.io/anthropic"
+        )
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        thinking_blocks = [
+            b for b in assistant_msg["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert thinking_blocks == [], (
+            "Non-DeepSeek third-party endpoints must keep the generic "
+            "strip-all-thinking behaviour — unsigned blocks get rejected."
+        )
@@ -94,13 +94,16 @@ class TestKimiCodingSkipsAnthropicThinking:
        )
        assert "thinking" in kwargs

-    def test_kimi_root_endpoint_unaffected(self) -> None:
-        """Only the /coding route is special-cased — plain api.kimi.com is not.
+    def test_kimi_root_endpoint_via_anthropic_transport_omits_thinking(self) -> None:
+        """Plain ``api.kimi.com`` hit via the Anthropic transport also omits thinking.

-        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
-        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
-        should never see it, but if it somehow does we should not suppress
-        thinking there — that path has different semantics.
+        Auto-detection routes ``api.kimi.com/v1`` to ``chat_completions`` by
+        default, but users can explicitly configure
+        ``api_mode: anthropic_messages`` against any Kimi host.  The upstream
+        validation (reasoning_content required on replayed tool-call
+        messages) is the same regardless of URL path, so the thinking
+        suppression must apply to every Kimi host, not just ``/coding``.
+        See #17057.
        """
        from agent.anthropic_adapter import build_anthropic_kwargs

@@ -112,4 +115,98 @@ class TestKimiCodingSkipsAnthropicThinking:
            reasoning_config={"enabled": True, "effort": "medium"},
            base_url="https://api.kimi.com/v1",
        )
+        assert "thinking" not in kwargs
+
+    # ── #17057: custom / proxied Kimi-compatible endpoints ──────────
+    @pytest.mark.parametrize(
+        "base_url,model",
+        [
+            # Custom host with Kimi-family model — the reporter's case
+            ("http://my-kimi-proxy.internal", "kimi-2.6"),
+            ("https://llm.example.com/anthropic", "kimi-k2.5"),
+            ("https://llm.example.com/anthropic", "moonshot-v1-8k"),
+            ("https://llm.example.com/anthropic", "kimi_thinking"),
+            ("https://llm.example.com/anthropic", "moonshotai/kimi-k2.5"),
+            # Official Moonshot host (previously uncovered)
+            ("https://api.moonshot.ai/anthropic", "moonshot-v1-32k"),
+            ("https://api.moonshot.cn/anthropic", "moonshot-v1-32k"),
+        ],
+    )
+    def test_kimi_family_custom_endpoint_omits_thinking(
+        self, base_url: str, model: str
+    ) -> None:
+        """Custom / proxied Kimi endpoints must also strip Anthropic thinking."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model=model,
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=base_url,
+        )
+        assert "thinking" not in kwargs, (
+            f"Kimi-family endpoint ({base_url}, {model}) must not receive "
+            f"Anthropic thinking — upstream validates reasoning_content on "
+            f"replayed tool-call history we don't preserve."
+        )
+        assert "output_config" not in kwargs
+
+    def test_custom_endpoint_non_kimi_model_keeps_thinking(self) -> None:
+        """Custom endpoint with a non-Kimi model must keep thinking intact.
+
+        Guards against over-broad model-family matching — only model names
+        starting with a Kimi/Moonshot prefix should trigger suppression.
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://my-llm-proxy.example.com/anthropic",
+        )
        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+
+    def test_kimi_family_replay_preserves_unsigned_thinking(self) -> None:
+        """On a custom Kimi endpoint, unsigned reasoning_content thinking
+        blocks must survive the third-party signature-stripping pass so
+        the upstream's message-history validation passes.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        messages = [
+            {"role": "user", "content": "hi"},
+            {
+                "role": "assistant",
+                "reasoning_content": "planning the tool call",
+                "tool_calls": [
+                    {
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {"name": "skill_view", "arguments": "{}"},
+                    }
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        ]
+        _, converted = convert_messages_to_anthropic(
+            messages,
+            base_url="http://my-kimi-proxy.internal",
+            model="kimi-2.6",
+        )
+        # The assistant message still carries the unsigned thinking block
+        # synthesised from reasoning_content (required by Kimi's history
+        # validation).  A plain third-party endpoint would have stripped it.
+        assistant_msg = next(m for m in converted if m["role"] == "assistant")
+        assistant_blocks = assistant_msg["content"]
+        thinking_blocks = [
+            b for b in assistant_blocks
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert len(thinking_blocks) == 1
+        assert thinking_blocks[0]["thinking"] == "planning the tool call"
@@ -0,0 +1,320 @@
+"""Tests for the on_session_switch hook and session_id propagation.
+
+Covers #6672: memory providers must be notified when AIAgent.session_id
+rotates mid-process (via /resume, /branch, /reset, /new, or context
+compression). Without the notification, providers that cache per-session
+state in initialize() (Hindsight, and any plugin that stores session_id
+for scoped writes) keep writing into the old session's record.
+"""
+
+import json
+
+import pytest
+
+from agent.memory_manager import MemoryManager
+from agent.memory_provider import MemoryProvider
+
+
+class _RecordingProvider(MemoryProvider):
+    """Provider that records every lifecycle call for assertion."""
+
+    def __init__(self, name="rec"):
+        self._name = name
+        self.switch_calls: list[dict] = []
+        self.sync_calls: list[dict] = []
+        self.queue_calls: list[dict] = []
+        self.initialize_calls: list[dict] = []
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:  # pragma: no cover - unused
+        return True
+
+    def initialize(self, session_id, **kwargs):
+        self.initialize_calls.append({"session_id": session_id, **kwargs})
+
+    def get_tool_schemas(self):
+        return []
+
+    def sync_turn(self, user_content, assistant_content, *, session_id=""):
+        self.sync_calls.append(
+            {"user": user_content, "asst": assistant_content, "session_id": session_id}
+        )
+
+    def queue_prefetch(self, query, *, session_id=""):
+        self.queue_calls.append({"query": query, "session_id": session_id})
+
+    def on_session_switch(
+        self,
+        new_session_id,
+        *,
+        parent_session_id="",
+        reset=False,
+        **kwargs,
+    ):
+        self.switch_calls.append(
+            {
+                "new": new_session_id,
+                "parent": parent_session_id,
+                "reset": reset,
+                "extra": kwargs,
+            }
+        )
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider ABC — default on_session_switch is a no-op
+# ---------------------------------------------------------------------------
+
+
+class _MinimalProvider(MemoryProvider):
+    """Provider that does NOT override on_session_switch — ABC default must no-op."""
+
+    @property
+    def name(self) -> str:
+        return "minimal"
+
+    def is_available(self) -> bool:
+        return True
+
+    def initialize(self, session_id, **kwargs):  # pragma: no cover - unused
+        pass
+
+    def get_tool_schemas(self):
+        return []
+
+
+def test_abc_default_on_session_switch_is_noop():
+    """Providers that don't override the hook must not raise."""
+    p = _MinimalProvider()
+    # All three call styles must be accepted without raising
+    p.on_session_switch("new-id")
+    p.on_session_switch("new-id", parent_session_id="old-id")
+    p.on_session_switch("new-id", parent_session_id="old-id", reset=True)
+    p.on_session_switch("new-id", parent_session_id="old-id", reset=True, reason="new_session")
+
+
+# ---------------------------------------------------------------------------
+# MemoryManager.on_session_switch — fan-out
+# ---------------------------------------------------------------------------
+
+
+def test_manager_fans_out_to_all_providers():
+    mm = MemoryManager()
+    # Only one external provider is allowed; use the builtin slot for p1.
+    p1 = _RecordingProvider(name="builtin")
+    p2 = _RecordingProvider(name="hindsight")
+    mm.add_provider(p1)
+    mm.add_provider(p2)
+
+    mm.on_session_switch("new-sid", parent_session_id="old-sid", reset=False, reason="resume")
+
+    assert len(p1.switch_calls) == 1
+    assert len(p2.switch_calls) == 1
+    for call in (p1.switch_calls[0], p2.switch_calls[0]):
+        assert call["new"] == "new-sid"
+        assert call["parent"] == "old-sid"
+        assert call["reset"] is False
+        assert call["extra"] == {"reason": "resume"}
+
+
+def test_manager_ignores_empty_session_id():
+    """Empty string session_id must not trigger provider hooks.
+
+    Prevents accidental fires during shutdown when self.session_id may be
+    cleared. Providers expect a meaningful id to switch TO.
+    """
+    mm = MemoryManager()
+    p = _RecordingProvider()
+    mm.add_provider(p)
+    mm.on_session_switch("")
+    mm.on_session_switch(None)  # type: ignore[arg-type]
+    assert p.switch_calls == []
+
+
+def test_manager_isolates_provider_failures():
+    """A provider that raises must not block other providers."""
+
+    class _Broken(_RecordingProvider):
+        def on_session_switch(self, *args, **kwargs):  # type: ignore[override]
+            raise RuntimeError("boom")
+
+    mm = MemoryManager()
+    # MemoryManager rejects a second external provider, so pair broken
+    # (builtin slot) with a good external one.
+    broken = _Broken(name="builtin")
+    good = _RecordingProvider(name="good")
+    mm.add_provider(broken)
+    mm.add_provider(good)
+
+    # Must not raise — exceptions in one provider are swallowed + logged
+    mm.on_session_switch("new-sid", parent_session_id="old-sid")
+    assert len(good.switch_calls) == 1
+    assert good.switch_calls[0]["new"] == "new-sid"
+
+
+def test_manager_reset_flag_preserved():
+    mm = MemoryManager()
+    p = _RecordingProvider()
+    mm.add_provider(p)
+    mm.on_session_switch("new-sid", reset=True, reason="new_session")
+    assert p.switch_calls[0]["reset"] is True
+    assert p.switch_calls[0]["extra"] == {"reason": "new_session"}
+
+
+# ---------------------------------------------------------------------------
+# MemoryManager.sync_all / queue_prefetch_all — session_id propagation
+# ---------------------------------------------------------------------------
+
+
+def test_sync_all_propagates_session_id_to_providers():
+    """run_agent.py's sync_all call must pass session_id through to providers.
+
+    Without this, a provider that updates _session_id defensively in
+    sync_turn (as Hindsight does at hindsight/__init__.py:1199) never
+    sees the new id and keeps writing under the old one.
+    """
+    mm = MemoryManager()
+    p = _RecordingProvider()
+    mm.add_provider(p)
+    mm.sync_all("hello", "world", session_id="sess-42")
+    assert p.sync_calls == [
+        {"user": "hello", "asst": "world", "session_id": "sess-42"}
+    ]
+
+
+def test_queue_prefetch_all_propagates_session_id_to_providers():
+    mm = MemoryManager()
+    p = _RecordingProvider()
+    mm.add_provider(p)
+    mm.queue_prefetch_all("next query", session_id="sess-42")
+    assert p.queue_calls == [{"query": "next query", "session_id": "sess-42"}]
+
+
+# ---------------------------------------------------------------------------
+# Hindsight reference implementation — state-flush semantics
+# ---------------------------------------------------------------------------
+
+
+def _make_hindsight_provider():
+    """Build a bare HindsightMemoryProvider that skips network setup.
+
+    We instantiate without importing optional deps at class-level by
+    bypassing __init__ and seeding the attributes on_session_switch
+    reads/writes. This keeps the test hermetic.
+    """
+    import threading
+    hindsight_mod = pytest.importorskip("plugins.memory.hindsight")
+    provider = object.__new__(hindsight_mod.HindsightMemoryProvider)
+    provider._session_id = "old-sid"
+    provider._parent_session_id = ""
+    provider._document_id = "old-sid-20260101_000000_000000"
+    provider._session_turns = ["turn-1", "turn-2"]
+    provider._turn_counter = 2
+    provider._turn_index = 2
+    # Attrs read by _build_metadata / _build_retain_kwargs when the
+    # buffer-flush path on session switch fires. Empty strings keep the
+    # metadata minimal but well-formed.
+    provider._retain_source = ""
+    provider._platform = ""
+    provider._user_id = ""
+    provider._user_name = ""
+    provider._chat_id = ""
+    provider._chat_name = ""
+    provider._chat_type = ""
+    provider._thread_id = ""
+    provider._agent_identity = ""
+    provider._agent_workspace = ""
+    provider._retain_tags = []
+    provider._retain_context = "test-context"
+    provider._retain_async = False
+    provider._bank_id = "test-bank"
+    # Prefetch state the switch path drains/clears.
+    provider._prefetch_thread = None
+    provider._prefetch_lock = threading.Lock()
+    provider._prefetch_result = ""
+    # Sync thread tracking (legacy alias at the writer).
+    provider._sync_thread = None
+    # Writer queue infra the flush-on-switch path enqueues onto. We stub
+    # _ensure_writer / _register_atexit so no real thread is spawned;
+    # tests exercising flush delivery live in
+    # tests/plugins/memory/test_hindsight_provider.py where the full
+    # writer-queue wiring is in place.
+    import queue as _queue
+    provider._retain_queue = _queue.Queue()
+    provider._shutting_down = threading.Event()
+    provider._atexit_registered = True
+    provider._ensure_writer = lambda: None
+    provider._register_atexit = lambda: None
+    # Stub the network-touching helper so any enqueued flush closure is
+    # a no-op if ever drained in a unit test.
+    provider._run_hindsight_operation = lambda _op: None
+    return provider
+
+
+def test_hindsight_on_session_switch_updates_session_id_and_mints_fresh_doc():
+    provider = _make_hindsight_provider()
+    old_doc = provider._document_id
+
+    provider.on_session_switch(
+        "new-sid", parent_session_id="old-sid", reset=False, reason="resume"
+    )
+
+    assert provider._session_id == "new-sid"
+    assert provider._parent_session_id == "old-sid"
+    # Document id MUST be fresh — else next retain overwrites old session doc
+    assert provider._document_id != old_doc
+    assert provider._document_id.startswith("new-sid-")
+
+
+def test_hindsight_on_session_switch_clears_turn_buffers():
+    """Accumulated _session_turns must not leak into the next session.
+
+    Hindsight batches turns under a single _document_id. If the buffer
+    isn't cleared on switch, the next retain under the new _document_id
+    flushes turns that belong to the previous session.
+    """
+    provider = _make_hindsight_provider()
+    provider.on_session_switch("new-sid", parent_session_id="old-sid")
+    assert provider._session_turns == []
+    assert provider._turn_counter == 0
+    assert provider._turn_index == 0
+
+
+def test_hindsight_on_session_switch_clears_on_reset_true():
+    """reset=True (from /new, /reset) must also flush buffers."""
+    provider = _make_hindsight_provider()
+    provider.on_session_switch("new-sid", reset=True, reason="new_session")
+    assert provider._session_id == "new-sid"
+    assert provider._session_turns == []
+    assert provider._turn_counter == 0
+
+
+def test_hindsight_on_session_switch_ignores_empty_id():
+    """Empty new_session_id must be a no-op to avoid corrupting state."""
+    provider = _make_hindsight_provider()
+    before = (
+        provider._session_id,
+        provider._document_id,
+        list(provider._session_turns),
+        provider._turn_counter,
+    )
+    provider.on_session_switch("")
+    provider.on_session_switch(None)  # type: ignore[arg-type]
+    after = (
+        provider._session_id,
+        provider._document_id,
+        list(provider._session_turns),
+        provider._turn_counter,
+    )
+    assert before == after
+
+
+def test_hindsight_preserves_parent_across_empty_parent_arg():
+    """Omitting parent_session_id must NOT overwrite an existing one."""
+    provider = _make_hindsight_provider()
+    provider._parent_session_id = "original-parent"
+    provider.on_session_switch("new-sid")  # no parent passed
+    assert provider._parent_session_id == "original-parent"
@@ -205,11 +205,22 @@ class TestDetectOpenclawResidue:


 class TestOpenclawResidueHint:
-    def test_hint_mentions_cleanup_command(self):
+    def test_hint_mentions_migrate_command(self):
+        # `migrate` is the non-destructive path — should lead the banner.
        msg = openclaw_residue_hint_cli()
-        assert "hermes claw cleanup" in msg
+        assert "hermes claw migrate" in msg
        assert "~/.openclaw" in msg

+    def test_hint_mentions_cleanup_command(self):
+        # `cleanup` is mentioned as the follow-up archive step.
+        assert "hermes claw cleanup" in openclaw_residue_hint_cli()
+
+    def test_hint_warns_cleanup_breaks_openclaw(self):
+        # Archiving the directory breaks OpenClaw for users still running it —
+        # the banner must flag that side effect.
+        msg = openclaw_residue_hint_cli().lower()
+        assert "openclaw will stop working" in msg or "stop working" in msg
+
    def test_hint_not_empty(self):
        assert openclaw_residue_hint_cli().strip()

@@ -122,21 +122,25 @@ class TestChatCompletionsBuildKwargs:
        )
        assert kw["extra_body"]["think"] is False

-    def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
+    def test_gemini_native_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta",
        )
        assert "thinking_config" not in kw.get("extra_body", {})
+        assert "google" not in kw.get("extra_body", {})
+        assert "extra_body" not in kw.get("extra_body", {})

-    def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport):
+    def test_gemini_native_flash_reasoning_maps_to_top_level_thinking_config(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta",
            reasoning_config={"enabled": True, "effort": "high"},
        )
        assert kw["extra_body"]["thinking_config"] == {
@@ -144,52 +148,85 @@ class TestChatCompletionsBuildKwargs:
            "thinkingLevel": "high",
        }

-    def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport):
+    def test_gemini_openai_compat_flash_reasoning_maps_to_nested_google_thinking_config(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gemini-3-flash-preview",
+            messages=msgs,
+            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert "thinking_config" not in kw["extra_body"]
+        assert kw["extra_body"]["extra_body"]["google"]["thinking_config"] == {
+            "include_thoughts": True,
+            "thinking_level": "high",
+        }
+
+    def test_gemini_native_25_reasoning_only_enables_visible_thoughts(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-2.5-flash",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta",
            reasoning_config={"enabled": True, "effort": "high"},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": True,
        }

-    def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport):
+    def test_gemini_openai_compat_pro_reasoning_clamps_to_supported_levels(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="google/gemini-3.1-pro-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai",
            reasoning_config={"enabled": True, "effort": "medium"},
        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-            "thinkingLevel": "low",
+        assert kw["extra_body"]["extra_body"]["google"]["thinking_config"] == {
+            "include_thoughts": True,
+            "thinking_level": "low",
        }

-    def test_gemini_disabled_reasoning_hides_thoughts(self, transport):
+    def test_gemini_native_disabled_reasoning_hides_thoughts(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta",
            reasoning_config={"enabled": False},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": False,
        }

-    def test_gemini_xhigh_clamps_to_high(self, transport):
+    def test_gemini_openai_compat_xhigh_clamps_to_high(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai",
            reasoning_config={"enabled": True, "effort": "xhigh"},
        )
-        assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
+        assert kw["extra_body"]["extra_body"]["google"]["thinking_config"]["thinking_level"] == "high"
+
+    def test_google_gemini_cli_keeps_top_level_thinking_config(self, transport):
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gemini-3-flash-preview",
+            messages=msgs,
+            provider_name="google-gemini-cli",
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert kw["extra_body"]["thinking_config"] == {
+            "includeThoughts": True,
+            "thinkingLevel": "high",
+        }
+        assert "google" not in kw["extra_body"]

    def test_gemini_flash_minimal_clamps_to_low(self, transport):
        # Gemini 3 Flash documents low/medium/high; "minimal" isn't accepted,
@@ -199,11 +236,12 @@ class TestChatCompletionsBuildKwargs:
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai",
            reasoning_config={"enabled": True, "effort": "minimal"},
        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-            "thinkingLevel": "low",
+        assert kw["extra_body"]["extra_body"]["google"]["thinking_config"] == {
+            "include_thoughts": True,
+            "thinking_level": "low",
        }

    def test_max_tokens_with_fn(self, transport):
@@ -192,6 +192,33 @@ class TestBranchCommandCLI:

        assert cli_instance._resumed is True

+    def test_branch_fires_on_session_switch_hook(self, cli_instance, session_db):
+        """The /branch command must notify memory providers of the rotation.
+
+        Without this, providers that cache per-session state in
+        initialize() keep writing under the old session_id. See #6672.
+        """
+        from cli import HermesCLI
+
+        # Wire a real-ish agent object with a MagicMock memory_manager
+        agent = MagicMock()
+        mm = MagicMock()
+        agent._memory_manager = mm
+        cli_instance.agent = agent
+        original_id = cli_instance.session_id
+
+        HermesCLI._handle_branch_command(cli_instance, "/branch")
+
+        # Hook must have been called exactly once with the new session_id,
+        # parent pointing at the branched-from session, reset=False, and
+        # reason="branch" for diagnostics.
+        assert mm.on_session_switch.call_count == 1
+        _, kwargs = mm.on_session_switch.call_args
+        assert mm.on_session_switch.call_args.args[0] == cli_instance.session_id
+        assert kwargs["parent_session_id"] == original_id
+        assert kwargs["reset"] is False
+        assert kwargs["reason"] == "branch"
+
    def test_fork_alias(self):
        """The /fork alias should resolve to 'branch'."""
        from hermes_cli.commands import resolve_command
@@ -1,9 +1,11 @@
 """Tests for CLI browser CDP auto-launch helpers."""

 import os
+import subprocess
 from unittest.mock import patch

 from cli import HermesCLI
+from hermes_cli.browser_connect import manual_chrome_debug_command


 def _assert_chrome_debug_cmd(cmd, expected_chrome, expected_port):
@@ -26,13 +28,19 @@ class TestChromeDebugLaunch:
            captured["kwargs"] = kwargs
            return object()

-        with patch("cli.shutil.which", side_effect=lambda name: r"C:\Chrome\chrome.exe" if name == "chrome.exe" else None), \
-             patch("cli.os.path.isfile", side_effect=lambda path: path == r"C:\Chrome\chrome.exe"), \
+        with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: r"C:\Chrome\chrome.exe" if name == "chrome.exe" else None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == r"C:\Chrome\chrome.exe"), \
             patch("subprocess.Popen", side_effect=fake_popen):
            assert HermesCLI._try_launch_chrome_debug(9333, "Windows") is True

        _assert_chrome_debug_cmd(captured["cmd"], r"C:\Chrome\chrome.exe", 9333)
-        assert captured["kwargs"]["start_new_session"] is True
+        # Windows uses creationflags (POSIX-only start_new_session would raise).
+        assert "start_new_session" not in captured["kwargs"]
+        flags = captured["kwargs"].get("creationflags", 0)
+        expected = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
+            subprocess, "CREATE_NEW_PROCESS_GROUP", 0
+        )
+        assert flags == expected

    def test_windows_launch_falls_back_to_common_install_dirs(self, monkeypatch):
        captured = {}
@@ -49,9 +57,45 @@ class TestChromeDebugLaunch:
        monkeypatch.delenv("ProgramFiles(x86)", raising=False)
        monkeypatch.delenv("LOCALAPPDATA", raising=False)

-        with patch("cli.shutil.which", return_value=None), \
-             patch("cli.os.path.isfile", side_effect=lambda path: path == installed), \
+        with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == installed), \
             patch("subprocess.Popen", side_effect=fake_popen):
            assert HermesCLI._try_launch_chrome_debug(9222, "Windows") is True

        _assert_chrome_debug_cmd(captured["cmd"], installed, 9222)
+
+    def test_manual_command_uses_detected_linux_browser(self):
+        with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: "/usr/bin/chromium" if name == "chromium" else None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == "/usr/bin/chromium"):
+            command = manual_chrome_debug_command(9222, "Linux")
+
+        assert command is not None
+        assert command.startswith("/usr/bin/chromium --remote-debugging-port=9222")
+
+    def test_manual_command_uses_wsl_windows_chrome_when_available(self):
+        chrome = "/mnt/c/Program Files/Google/Chrome/Application/chrome.exe"
+
+        with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == chrome):
+            command = manual_chrome_debug_command(9222, "Linux")
+
+        assert command is not None
+        # Linux/WSL uses POSIX shell quoting (single quotes around paths with spaces).
+        assert command.startswith(f"'{chrome}' --remote-debugging-port=9222")
+
+    def test_manual_command_uses_windows_quoting_on_windows(self):
+        chrome = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
+
+        with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: chrome if name == "chrome.exe" else None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == chrome):
+            command = manual_chrome_debug_command(9222, "Windows")
+
+        assert command is not None
+        # Windows uses cmd.exe-compatible quoting via subprocess.list2cmdline.
+        assert command.startswith(f'"{chrome}" --remote-debugging-port=9222')
+        assert "'" not in command
+
+    def test_manual_command_returns_none_when_linux_browser_missing(self):
+        with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
+             patch("hermes_cli.browser_connect.os.path.isfile", return_value=False):
+            assert manual_chrome_debug_command(9222, "Linux") is None
@@ -296,6 +296,30 @@ class TestRootLevelProviderOverride:
        # Root-level "opencode-go" must NOT leak through
        assert cfg["model"]["provider"] != "opencode-go"

+    def test_terminal_vercel_runtime_bridged_to_env(self, tmp_path, monkeypatch):
+        """Classic CLI must expose terminal.vercel_runtime to terminal_tool.py."""
+        import yaml
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("TERMINAL_VERCEL_RUNTIME", raising=False)
+
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(yaml.safe_dump({
+            "terminal": {
+                "backend": "vercel_sandbox",
+                "vercel_runtime": "python3.13",
+            },
+        }))
+
+        import cli
+        monkeypatch.setattr(cli, "_hermes_home", hermes_home)
+        cfg = cli.load_cli_config()
+
+        assert cfg["terminal"]["vercel_runtime"] == "python3.13"
+        assert os.environ["TERMINAL_VERCEL_RUNTIME"] == "python3.13"
+
    def test_normalize_root_model_keys_moves_to_model(self):
        """_normalize_root_model_keys migrates root keys into model section."""
        from hermes_cli.config import _normalize_root_model_keys
@@ -0,0 +1,119 @@
+"""Tests for `/exit --delete` and `/quit --delete` session deletion.
+
+Ports the behavior from google-gemini/gemini-cli#19332: running `/exit` or
+`/quit` with the `--delete` flag arms a one-shot `_delete_session_on_exit`
+flag that the CLI shutdown path uses to remove the current session from
+SQLite + on-disk transcripts before exit.
+"""
+
+from unittest.mock import MagicMock
+
+
+def _make_cli():
+    """Bare HermesCLI suitable for process_command() tests.
+
+    Uses ``__new__`` to skip the heavy __init__; only sets the attributes
+    the /exit branch touches.
+    """
+    from cli import HermesCLI
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.config = {}
+    cli.console = MagicMock()
+    cli.agent = None
+    cli.conversation_history = []
+    cli.session_id = "test-session"
+    cli._delete_session_on_exit = False
+    return cli
+
+
+class TestExitDeleteFlag:
+    def test_plain_exit_does_not_arm_delete(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit")
+        assert result is False
+        assert cli._delete_session_on_exit is False
+
+    def test_plain_quit_does_not_arm_delete(self):
+        cli = _make_cli()
+        result = cli.process_command("/quit")
+        assert result is False
+        assert cli._delete_session_on_exit is False
+
+    def test_exit_delete_arms_flag(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit --delete")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_quit_delete_arms_flag(self):
+        cli = _make_cli()
+        result = cli.process_command("/quit --delete")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_exit_delete_short_form(self):
+        """`-d` is a convenience alias for `--delete`."""
+        cli = _make_cli()
+        result = cli.process_command("/exit -d")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_quit_alias_q_is_not_quit(self):
+        """`/q` is the alias for `/queue`, not `/quit`. This test documents
+        that /q --delete does NOT arm session deletion — it would dispatch
+        to /queue instead."""
+        cli = _make_cli()
+        cli._pending_input = __import__("queue").Queue()
+        # /q with no args shows a usage error and keeps the CLI running.
+        result = cli.process_command("/q")
+        assert result is not False  # queue command doesn't exit
+        assert cli._delete_session_on_exit is False
+
+    def test_delete_flag_is_case_insensitive(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit --DELETE")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_delete_flag_trims_whitespace(self):
+        cli = _make_cli()
+        result = cli.process_command("/exit   --delete   ")
+        assert result is False
+        assert cli._delete_session_on_exit is True
+
+    def test_unknown_exit_argument_does_not_exit(self):
+        """Unrecognised args should NOT exit the CLI — they surface an
+        error message and stay in the session. This prevents accidental
+        session destruction from typos like `/exit -delete`."""
+        cli = _make_cli()
+        result = cli.process_command("/exit --delte")
+        # process_command returns True = keep running
+        assert result is True
+        assert cli._delete_session_on_exit is False
+
+    def test_unknown_exit_argument_prints_help(self):
+        cli = _make_cli()
+        # _cprint goes through module-level print, so capture via console.
+        # We can't patch _cprint directly without import juggling; the
+        # previous assertion already proves the unknown-arg branch is
+        # reached (result True + flag False).
+        result = cli.process_command("/exit garbage")
+        assert result is True
+        assert cli._delete_session_on_exit is False
+
+
+class TestCommandRegistry:
+    def test_quit_command_advertises_delete_flag(self):
+        """The CommandDef args_hint should surface `--delete` in /help and
+        CLI autocomplete."""
+        from hermes_cli.commands import resolve_command
+        cmd = resolve_command("quit")
+        assert cmd is not None
+        assert cmd.args_hint == "[--delete]"
+
+    def test_exit_alias_resolves_to_quit_with_hint(self):
+        from hermes_cli.commands import resolve_command
+        cmd = resolve_command("exit")
+        assert cmd is not None
+        assert cmd.name == "quit"
+        assert cmd.args_hint == "[--delete]"
@@ -0,0 +1,87 @@
+"""Test that compute_next_run uses last_run_at for cron jobs.
+
+Regression test for: cron jobs computing next_run_at from _hermes_now()
+instead of from last_run_at, making them inconsistent with interval jobs.
+"""
+import pytest
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+pytest.importorskip("croniter")
+
+from cron.jobs import compute_next_run
+
+
+class TestCronComputeNextRunUsesLastRunAt:
+    """compute_next_run MUST use last_run_at as the croniter base for cron jobs,
+    consistent with how interval jobs work."""
+
+    def test_cron_uses_last_run_at_for_every_6h_schedule(self, monkeypatch):
+        """For a schedule like 'every 6 hours', the base time matters.
+        If last_run_at is Apr 6 14:10, next should be Apr 6 18:00.
+        If now is Apr 10 22:00, next should be Apr 11 00:00.
+        compute_next_run must use last_run_at, not now."""
+        morocco = ZoneInfo("Africa/Casablanca")
+
+        # Job last ran April 6 at 14:10
+        last_run = datetime(2026, 4, 6, 14, 10, 0, tzinfo=morocco)
+
+        # But now it's April 10 at 22:00 (e.g., gateway restarted)
+        now = datetime(2026, 4, 10, 22, 0, 0, tzinfo=morocco)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "cron", "expr": "0 */6 * * *"}  # every 6 hours
+
+        result = compute_next_run(schedule, last_run_at=last_run.isoformat())
+        assert result is not None
+        next_dt = datetime.fromisoformat(result)
+
+        # With last_run_at as base (Apr 6 14:10), next is Apr 6 18:00.
+        # With now as base (Apr 10 22:00), next is Apr 11 00:00.
+        # The fix should use last_run_at, returning Apr 6 18:00
+        # (stale detection in get_due_jobs() fast-forwards from there).
+        assert next_dt.date().isoformat() == "2026-04-06", (
+            f"Expected next run on Apr 6 (from last_run_at), got {next_dt}"
+        )
+        assert next_dt.hour == 18
+
+    def test_cron_without_last_run_at_uses_now(self, monkeypatch):
+        """When last_run_at is NOT provided, compute_next_run falls back to
+        _hermes_now() as the croniter base (existing behavior)."""
+        morocco = ZoneInfo("Africa/Casablanca")
+
+        now = datetime(2026, 4, 10, 22, 0, 0, tzinfo=morocco)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        schedule = {"kind": "cron", "expr": "0 */6 * * *"}
+
+        result = compute_next_run(schedule)
+        assert result is not None
+        next_dt = datetime.fromisoformat(result)
+
+        # Without last_run_at, should compute from now -> Apr 11 00:00
+        assert next_dt.date().isoformat() == "2026-04-11", (
+            f"Expected next run on Apr 11 (from now), got {next_dt}"
+        )
+        assert next_dt.hour == 0
+
+    def test_cron_weekly_consistent_with_interval(self, monkeypatch):
+        """Both cron and interval jobs should anchor to last_run_at when
+        provided, producing consistent behavior after a crash/restart."""
+        morocco = ZoneInfo("Africa/Casablanca")
+
+        last_run = datetime(2026, 4, 6, 14, 10, 0, tzinfo=morocco)
+        now = datetime(2026, 4, 10, 22, 0, 0, tzinfo=morocco)
+        monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
+
+        cron_schedule = {"kind": "cron", "expr": "0 14 * * 1"}
+        interval_schedule = {"kind": "interval", "minutes": 7 * 24 * 60}
+
+        cron_result = compute_next_run(cron_schedule, last_run_at=last_run.isoformat())
+        interval_result = compute_next_run(interval_schedule, last_run_at=last_run.isoformat())
+
+        # Both should be after last_run_at
+        cron_dt = datetime.fromisoformat(cron_result)
+        interval_dt = datetime.fromisoformat(interval_result)
+        assert cron_dt > last_run, f"Cron next {cron_dt} should be after last_run {last_run}"
+        assert interval_dt > last_run, f"Interval next {interval_dt} should be after last_run {last_run}"
@@ -169,10 +169,20 @@ class TestInactivityTimeout:

        assert result["final_response"] == "Done"

+    def _parse_cron_timeout(self, raw_value):
+        """Mirror the defensive parsing logic from cron/scheduler.py run_job()."""
+        if raw_value:
+            try:
+                return float(raw_value)
+            except (ValueError, TypeError):
+                return 600.0
+        return 600.0
+
    def test_timeout_env_var_parsing(self, monkeypatch):
        """HERMES_CRON_TIMEOUT env var is respected."""
        monkeypatch.setenv("HERMES_CRON_TIMEOUT", "1200")
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        _cron_timeout = self._parse_cron_timeout(raw)
        assert _cron_timeout == 1200.0

        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
@@ -181,10 +191,27 @@ class TestInactivityTimeout:
    def test_timeout_zero_means_unlimited(self, monkeypatch):
        """HERMES_CRON_TIMEOUT=0 yields None (unlimited)."""
        monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0")
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        _cron_timeout = self._parse_cron_timeout(raw)
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        assert _cron_inactivity_limit is None

+    def test_timeout_invalid_value_falls_back_to_default(self, monkeypatch):
+        """HERMES_CRON_TIMEOUT=abc should fall back to 600s, not raise ValueError."""
+        monkeypatch.setenv("HERMES_CRON_TIMEOUT", "abc")
+        raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        _cron_timeout = self._parse_cron_timeout(raw)
+        assert _cron_timeout == 600.0
+        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
+        assert _cron_inactivity_limit == 600.0
+
+    def test_timeout_empty_string_uses_default(self, monkeypatch):
+        """HERMES_CRON_TIMEOUT='' (empty) should use the 600s default."""
+        monkeypatch.setenv("HERMES_CRON_TIMEOUT", "")
+        raw = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        _cron_timeout = self._parse_cron_timeout(raw)
+        assert _cron_timeout == 600.0
+
    def test_timeout_error_includes_diagnostics(self):
        """The TimeoutError message should include last activity info."""
        agent = SlowFakeAgent(
@@ -265,6 +265,7 @@ class TestRunJobTerminalCwd:
        class FakeAgent:
            def __init__(self, **kwargs):
                observed["skip_context_files"] = kwargs.get("skip_context_files")
+                observed["load_soul_identity"] = kwargs.get("load_soul_identity")
                observed["terminal_cwd_during_init"] = os.environ.get(
                    "TERMINAL_CWD", "_UNSET_"
                )
@@ -335,6 +336,7 @@ class TestRunJobTerminalCwd:

        # AIAgent was built with skip_context_files=False (feature ON).
        assert observed["skip_context_files"] is False
+        assert observed["load_soul_identity"] is True
        # TERMINAL_CWD was pointing at the job workdir while the agent ran.
        assert observed["terminal_cwd_during_init"] == str(tmp_path.resolve())
        assert observed["terminal_cwd_during_run"] == str(tmp_path.resolve())
@@ -373,6 +375,8 @@ class TestRunJobTerminalCwd:

        # Feature is OFF — skip_context_files stays True.
        assert observed["skip_context_files"] is True
+        # Cron still forces SOUL.md identity even when cwd context files stay off.
+        assert observed["load_soul_identity"] is True
        # TERMINAL_CWD saw the same value during init as it had before.
        assert observed["terminal_cwd_during_init"] == before
        # And after run_job completes, it's still the sentinel (nothing
@@ -279,6 +279,44 @@ class TestResolveDeliveryTarget:
            "thread_id": None,
        }

+    def test_list_form_deliver_is_normalized(self, monkeypatch):
+        """deliver=['telegram'] (Python list) should resolve like 'telegram' string.
+
+        Regression test for #17139: MCP clients / scripts that pass the deliver
+        field as an array-shaped value used to fail with "no delivery target
+        resolved for deliver=['telegram']" because ``str(['telegram'])`` was
+        passed through to ``split(',')`` verbatim.
+        """
+        monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "-4004")
+        job = {
+            "deliver": ["telegram"],
+            "origin": None,
+        }
+
+        assert _resolve_delivery_target(job) == {
+            "platform": "telegram",
+            "chat_id": "-4004",
+            "thread_id": None,
+        }
+
+    def test_list_form_multiple_platforms_normalized(self, monkeypatch):
+        """deliver=['telegram', 'discord'] resolves to multiple targets."""
+        from cron.scheduler import _resolve_delivery_targets
+
+        monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "-111")
+        monkeypatch.setenv("DISCORD_HOME_CHANNEL", "-222")
+        job = {"deliver": ["telegram", "discord"], "origin": None}
+
+        targets = _resolve_delivery_targets(job)
+        platforms = sorted(t["platform"] for t in targets)
+        assert platforms == ["discord", "telegram"]
+
+    def test_empty_list_form_deliver_resolves_to_local(self):
+        """deliver=[] is treated as local (no delivery)."""
+        from cron.scheduler import _resolve_delivery_targets
+
+        assert _resolve_delivery_targets({"deliver": []}) == []
+

 class TestDeliverResultWrapping:
    """Verify that cron deliveries are wrapped with header/footer and no longer mirrored."""
@@ -314,6 +314,7 @@ def _create_app(adapter: APIServerAdapter) -> web.Application:
    app.router.add_get("/health/detailed", adapter._handle_health_detailed)
    app.router.add_get("/v1/health", adapter._handle_health)
    app.router.add_get("/v1/models", adapter._handle_models)
+    app.router.add_get("/v1/capabilities", adapter._handle_capabilities)
    app.router.add_post("/v1/chat/completions", adapter._handle_chat_completions)
    app.router.add_post("/v1/responses", adapter._handle_responses)
    app.router.add_get("/v1/responses/{response_id}", adapter._handle_get_response)
@@ -491,6 +492,46 @@ class TestModelsEndpoint:
            assert resp.status == 200


+# ---------------------------------------------------------------------------
+# /v1/capabilities endpoint
+# ---------------------------------------------------------------------------
+
+
+class TestCapabilitiesEndpoint:
+    @pytest.mark.asyncio
+    async def test_capabilities_advertises_plugin_safe_contract(self, adapter):
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/v1/capabilities")
+            assert resp.status == 200
+            data = await resp.json()
+            assert data["object"] == "hermes.api_server.capabilities"
+            assert data["platform"] == "hermes-agent"
+            assert data["model"] == "hermes-agent"
+            assert data["auth"]["type"] == "bearer"
+            assert data["auth"]["required"] is False
+            assert data["features"]["chat_completions"] is True
+            assert data["features"]["run_status"] is True
+            assert data["features"]["run_events_sse"] is True
+            assert data["features"]["session_continuity_header"] == "X-Hermes-Session-Id"
+            assert data["endpoints"]["run_status"]["path"] == "/v1/runs/{run_id}"
+
+    @pytest.mark.asyncio
+    async def test_capabilities_requires_auth_when_key_configured(self, auth_adapter):
+        app = _create_app(auth_adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/v1/capabilities")
+            assert resp.status == 401
+
+            authed = await cli.get(
+                "/v1/capabilities",
+                headers={"Authorization": "Bearer sk-secret"},
+            )
+            assert authed.status == 200
+            data = await authed.json()
+            assert data["auth"]["required"] is True
+
+
 # ---------------------------------------------------------------------------
 # /v1/chat/completions endpoint
 # ---------------------------------------------------------------------------
@@ -647,17 +688,17 @@ class TestChatCompletionsEndpoint:

    @pytest.mark.asyncio
    async def test_stream_includes_tool_progress(self, adapter):
-        """tool_progress_callback fires → progress appears as custom SSE event, not in delta.content."""
+        """tool_start_callback fires → progress appears as custom SSE event, not in delta.content."""
        import asyncio

        app = _create_app(adapter)
        async with TestClient(TestServer(app)) as cli:
            async def _mock_run_agent(**kwargs):
                cb = kwargs.get("stream_delta_callback")
-                tp_cb = kwargs.get("tool_progress_callback")
-                # Simulate tool progress before streaming content
-                if tp_cb:
-                    tp_cb("tool.started", "terminal", "ls -la", {"command": "ls -la"})
+                ts_cb = kwargs.get("tool_start_callback")
+                # Simulate the structured tool start the gateway now consumes.
+                if ts_cb:
+                    ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
                if cb:
                    await asyncio.sleep(0.05)
                    cb("Here are the files.")
@@ -683,7 +724,10 @@ class TestChatCompletionsEndpoint:
                # markers instead of calling tools (#6972).
                assert "event: hermes.tool.progress" in body
                assert '"tool": "terminal"' in body
-                assert '"label": "ls -la"' in body
+                # ``label`` is now derived by ``build_tool_preview`` from the
+                # tool args rather than passed by the caller, so we assert
+                # only that *some* label exists rather than a literal value.
+                assert '"label":' in body
                # The progress marker must NOT appear inside any
                # chat.completion.chunk delta.content field.
                import json as _json
@@ -703,17 +747,17 @@ class TestChatCompletionsEndpoint:

    @pytest.mark.asyncio
    async def test_stream_tool_progress_skips_internal_events(self, adapter):
-        """Internal events (name starting with _) are not streamed."""
+        """Internal tool calls (name starting with ``_``) are not streamed."""
        import asyncio

        app = _create_app(adapter)
        async with TestClient(TestServer(app)) as cli:
            async def _mock_run_agent(**kwargs):
                cb = kwargs.get("stream_delta_callback")
-                tp_cb = kwargs.get("tool_progress_callback")
-                if tp_cb:
-                    tp_cb("tool.started", "_thinking", "some internal state", {})
-                    tp_cb("tool.started", "web_search", "Python docs", {"query": "Python docs"})
+                ts_cb = kwargs.get("tool_start_callback")
+                if ts_cb:
+                    ts_cb("call_internal_1", "_thinking", {"text": "some internal state"})
+                    ts_cb("call_search_1", "web_search", {"query": "Python docs"})
                if cb:
                    await asyncio.sleep(0.05)
                    cb("Found it.")
@@ -735,10 +779,142 @@ class TestChatCompletionsEndpoint:
                body = await resp.text()
                # Internal _thinking event should NOT appear anywhere
                assert "some internal state" not in body
+                assert "call_internal_1" not in body
                # Real tool progress should appear as custom SSE event
                assert "event: hermes.tool.progress" in body
                assert '"tool": "web_search"' in body
-                assert '"label": "Python docs"' in body
+                # Label is derived from the args dict by build_tool_preview;
+                # asserting on the structural fact (label exists, call id
+                # is correlated) rather than a literal preview string keeps
+                # the test robust against preview-formatter tweaks.
+                assert '"label":' in body
+                assert '"toolCallId": "call_search_1"' in body
+
+    @pytest.mark.asyncio
+    async def test_stream_emits_tool_lifecycle_with_call_id(self, adapter):
+        """Regression for #16588.
+
+        ``/v1/chat/completions`` streaming previously emitted only a
+        ``tool.started``-style ``hermes.tool.progress`` event; clients
+        rendering tool lifecycle UI had no way to mark a tool as finished
+        because no matching ``status: completed`` event was emitted, and
+        no ``toolCallId`` was carried for correlation.
+
+        The fix adds ``tool_start_callback`` / ``tool_complete_callback``
+        to the chat completions agent invocation and writes both halves
+        of the lifecycle pair on the same ``event: hermes.tool.progress``
+        SSE line, with stable ``toolCallId`` and ``status``.
+        """
+        import asyncio
+        import json as _json
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                ts_cb = kwargs.get("tool_start_callback")
+                tc_cb = kwargs.get("tool_complete_callback")
+                # The structured callbacks own the chat-completions SSE
+                # channel now; ``tool_progress_callback`` is intentionally
+                # not wired so each tool start emits exactly one event.
+                if ts_cb:
+                    ts_cb("call_terminal_1", "terminal", {"command": "ls -la"})
+                if tc_cb:
+                    tc_cb("call_terminal_1", "terminal", {"command": "ls -la"}, "ok")
+                if cb:
+                    await asyncio.sleep(0.05)
+                    cb("done.")
+                return (
+                    {"final_response": "done.", "messages": [], "api_calls": 1},
+                    {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "test",
+                        "messages": [{"role": "user", "content": "list"}],
+                        "stream": True,
+                    },
+                )
+                assert resp.status == 200
+                body = await resp.text()
+
+            # Walk the SSE body and collect *(status, toolCallId)* pairs
+            # per event so the assertions verify per-event correlation —
+            # an event missing ``toolCallId`` would not pass even if a
+            # different event happens to carry the right id.
+            pairs: list[tuple[str | None, str | None]] = []
+            lines = body.splitlines()
+            for i, line in enumerate(lines):
+                if line.strip() != "event: hermes.tool.progress":
+                    continue
+                for follow in lines[i + 1: i + 4]:
+                    if follow.startswith("data: "):
+                        try:
+                            payload = _json.loads(follow[len("data: "):])
+                        except _json.JSONDecodeError:
+                            break
+                        pairs.append((payload.get("status"), payload.get("toolCallId")))
+                        break
+
+            # Each tool start must emit exactly one event (no duplicate
+            # legacy + new emit), and each lifecycle pair must carry the
+            # same toolCallId on every event — not just somewhere in the
+            # aggregate.
+            assert len(pairs) == 2, f"expected 2 events (running+completed), got {pairs}"
+            assert pairs[0] == ("running", "call_terminal_1"), pairs
+            assert pairs[1] == ("completed", "call_terminal_1"), pairs
+
+    @pytest.mark.asyncio
+    async def test_stream_tool_lifecycle_skips_internal_and_orphan_completes(self, adapter):
+        """Internal tools (``_thinking``-style) and ``completed`` events
+        without a prior matching ``running`` must produce no lifecycle
+        events on the wire — otherwise clients would see orphaned
+        ``status: completed`` updates they cannot correlate."""
+        import asyncio
+
+        app = _create_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            async def _mock_run_agent(**kwargs):
+                cb = kwargs.get("stream_delta_callback")
+                ts_cb = kwargs.get("tool_start_callback")
+                tc_cb = kwargs.get("tool_complete_callback")
+                # Internal tool — must be filtered.
+                if ts_cb:
+                    ts_cb("call_internal_1", "_thinking", {})
+                if tc_cb:
+                    tc_cb("call_internal_1", "_thinking", {}, "")
+                # Completion without start — orphan, must be dropped.
+                if tc_cb:
+                    tc_cb("call_orphan_1", "web_search", {}, "ok")
+                if cb:
+                    await asyncio.sleep(0.05)
+                    cb("ok.")
+                return (
+                    {"final_response": "ok.", "messages": [], "api_calls": 1},
+                    {"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
+                )
+
+            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
+                resp = await cli.post(
+                    "/v1/chat/completions",
+                    json={
+                        "model": "test",
+                        "messages": [{"role": "user", "content": "ok"}],
+                        "stream": True,
+                    },
+                )
+                assert resp.status == 200
+                body = await resp.text()
+
+            # Neither the internal call_id nor the orphan call_id should
+            # surface as a lifecycle payload on the wire.
+            assert "call_internal_1" not in body
+            assert "call_orphan_1" not in body
+            assert '"status": "running"' not in body
+            assert '"status": "completed"' not in body

    @pytest.mark.asyncio
    async def test_no_user_message_returns_400(self, adapter):
@@ -1,7 +1,8 @@
-"""Tests for /v1/runs endpoints: start, events, and stop.
+"""Tests for /v1/runs endpoints: start, status, events, and stop.

 Covers:
 - POST /v1/runs — start a run (202)
+- GET /v1/runs/{run_id} — poll run status
 - GET /v1/runs/{run_id}/events — SSE event stream
 - POST /v1/runs/{run_id}/stop — interrupt a running agent
 - Auth, error handling, and cleanup
@@ -46,6 +47,7 @@ def _create_runs_app(adapter: APIServerAdapter) -> web.Application:
    app = web.Application(middlewares=mws)
    app["api_server_adapter"] = adapter
    app.router.add_post("/v1/runs", adapter._handle_runs)
+    app.router.add_get("/v1/runs/{run_id}", adapter._handle_get_run)
    app.router.add_get("/v1/runs/{run_id}/events", adapter._handle_run_events)
    app.router.add_post("/v1/runs/{run_id}/stop", adapter._handle_stop_run)
    return app
@@ -116,6 +118,13 @@ class TestStartRun:
                assert data["status"] == "started"
                assert data["run_id"].startswith("run_")

+                status_resp = await cli.get(f"/v1/runs/{data['run_id']}")
+                assert status_resp.status == 200
+                status = await status_resp.json()
+                assert status["run_id"] == data["run_id"]
+                assert status["status"] in {"queued", "running", "completed"}
+                assert status["object"] == "hermes.run"
+
    @pytest.mark.asyncio
    async def test_start_invalid_json_returns_400(self, adapter):
        app = _create_runs_app(adapter)
@@ -143,6 +152,18 @@ class TestStartRun:
            resp = await cli.post("/v1/runs", json={"input": ""})
        assert resp.status == 400

+    @pytest.mark.asyncio
+    async def test_start_invalid_history_does_not_allocate_run(self, adapter):
+        app = _create_runs_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.post(
+                "/v1/runs",
+                json={"input": "hello", "conversation_history": {"role": "user"}},
+            )
+        assert resp.status == 400
+        assert adapter._run_streams == {}
+        assert adapter._run_statuses == {}
+
    @pytest.mark.asyncio
    async def test_start_requires_auth(self, auth_adapter):
        app = _create_runs_app(auth_adapter)
@@ -170,6 +191,89 @@ class TestStartRun:
                assert resp.status == 202


+# ---------------------------------------------------------------------------
+# GET /v1/runs/{run_id} — poll run status
+# ---------------------------------------------------------------------------
+
+
+class TestRunStatus:
+    @pytest.mark.asyncio
+    async def test_status_completed_run_includes_output_and_usage(self, adapter):
+        app = _create_runs_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_create_agent") as mock_create:
+                mock_agent = MagicMock()
+                mock_agent.run_conversation.return_value = {"final_response": "done"}
+                mock_agent.session_prompt_tokens = 4
+                mock_agent.session_completion_tokens = 2
+                mock_agent.session_total_tokens = 6
+                mock_create.return_value = mock_agent
+
+                resp = await cli.post("/v1/runs", json={"input": "hello"})
+                data = await resp.json()
+                run_id = data["run_id"]
+
+                for _ in range(20):
+                    status_resp = await cli.get(f"/v1/runs/{run_id}")
+                    assert status_resp.status == 200
+                    status = await status_resp.json()
+                    if status["status"] == "completed":
+                        break
+                    await asyncio.sleep(0.05)
+
+                assert status["status"] == "completed"
+                assert status["output"] == "done"
+                assert status["usage"]["total_tokens"] == 6
+                assert status["last_event"] == "run.completed"
+
+    @pytest.mark.asyncio
+    async def test_status_reflects_explicit_session_id(self, adapter):
+        app = _create_runs_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            with patch.object(adapter, "_create_agent") as mock_create:
+                mock_agent = MagicMock()
+                mock_agent.run_conversation.return_value = {"final_response": "done"}
+                mock_agent.session_prompt_tokens = 0
+                mock_agent.session_completion_tokens = 0
+                mock_agent.session_total_tokens = 0
+                mock_create.return_value = mock_agent
+
+                resp = await cli.post(
+                    "/v1/runs",
+                    json={"input": "hello", "session_id": "space-session"},
+                )
+                data = await resp.json()
+                run_id = data["run_id"]
+
+                for _ in range(20):
+                    status_resp = await cli.get(f"/v1/runs/{run_id}")
+                    status = await status_resp.json()
+                    if status["status"] == "completed":
+                        break
+                    await asyncio.sleep(0.05)
+
+                mock_agent.run_conversation.assert_called_once()
+                # task_id stays "default" so the Runs API shares one sandbox
+                # container with CLI/gateway; session_id is surfaced in status
+                # for external UIs to correlate runs with their own session IDs.
+                assert mock_agent.run_conversation.call_args.kwargs["task_id"] == "default"
+                assert status["session_id"] == "space-session"
+
+    @pytest.mark.asyncio
+    async def test_status_not_found_returns_404(self, adapter):
+        app = _create_runs_app(adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/v1/runs/run_nonexistent")
+        assert resp.status == 404
+
+    @pytest.mark.asyncio
+    async def test_status_requires_auth(self, auth_adapter):
+        app = _create_runs_app(auth_adapter)
+        async with TestClient(TestServer(app)) as cli:
+            resp = await cli.get("/v1/runs/run_any")
+        assert resp.status == 401
+
+
 # ---------------------------------------------------------------------------
 # GET /v1/runs/{run_id}/events — SSE event stream
 # ---------------------------------------------------------------------------
@@ -257,6 +361,11 @@ class TestStopRun:
                # Agent interrupt should have been called
                mock_agent.interrupt.assert_called_once_with("Stop requested via API")

+                status_resp = await cli.get(f"/v1/runs/{run_id}")
+                assert status_resp.status == 200
+                status_data = await status_resp.json()
+                assert status_data["status"] in {"stopping", "cancelled"}
+
                # Refs should be cleaned up
                await asyncio.sleep(0.5)
                assert run_id not in adapter._active_run_agents
@@ -33,6 +33,11 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
            "backend": "TERMINAL_ENV",
            "cwd": "TERMINAL_CWD",
            "timeout": "TERMINAL_TIMEOUT",
+            "vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
+            "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
+            "container_cpu": "TERMINAL_CONTAINER_CPU",
+            "container_memory": "TERMINAL_CONTAINER_MEMORY",
+            "container_disk": "TERMINAL_CONTAINER_DISK",
        }
        for cfg_key, env_var in terminal_env_map.items():
            if cfg_key in terminal_cfg:
@@ -41,6 +46,10 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
                # TERMINAL_CWD.  Mirrors the fix in gateway/run.py.
                if cfg_key == "cwd" and str(val) in (".", "auto", "cwd"):
                    continue
+                # Expand shell tilde so subprocess.Popen never receives a literal
+                # "~/" which the kernel rejects.
+                if cfg_key == "cwd" and isinstance(val, str):
+                    val = os.path.expanduser(val)
                if isinstance(val, list):
                    env[env_var] = json.dumps(val)
                else:
@@ -55,6 +64,8 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
        if alias_env not in env:
            alias_val = cfg.get(alias_key)
            if isinstance(alias_val, str) and alias_val.strip():
+                if alias_key == "cwd":
+                    alias_val = os.path.expanduser(alias_val)
                env[alias_env] = alias_val.strip()

    # --- Replicate lines 144-147: MESSAGING_CWD fallback ---
@@ -205,3 +216,53 @@ class TestNestedTerminalCwdPlaceholderSkip:
        assert result["TERMINAL_ENV"] == "docker"
        assert result["TERMINAL_TIMEOUT"] == "300"
        assert result["TERMINAL_CWD"] == "/from/env"
+
+
+class TestTildeExpansion:
+    """terminal.cwd values containing shell tilde must be expanded.
+
+    subprocess.Popen does not expand shell syntax, so a literal "~/"
+    causes FileNotFoundError.  Regression test for commit 3c42064e.
+    """
+
+    def test_terminal_cwd_tilde_expanded(self):
+        """terminal.cwd: '~/projects' should expand to /home/<user>/projects."""
+        cfg = {"terminal": {"cwd": "~/projects"}}
+        result = _simulate_config_bridge(cfg)
+        assert result["TERMINAL_CWD"] == os.path.expanduser("~/projects")
+
+    def test_top_level_cwd_tilde_expanded(self):
+        """top-level cwd: '~/' should expand to user's home directory."""
+        cfg = {"cwd": "~/"}
+        result = _simulate_config_bridge(cfg)
+        assert result["TERMINAL_CWD"] == os.path.expanduser("~/")
+
+    def test_tilde_with_nested_precedence(self):
+        """Nested terminal.cwd should win over top-level, both expanded."""
+        cfg = {
+            "cwd": "~/top",
+            "terminal": {"cwd": "~/nested"},
+        }
+        result = _simulate_config_bridge(cfg)
+        assert result["TERMINAL_CWD"] == os.path.expanduser("~/nested")
+
+
+class TestVercelTerminalBridge:
+    def test_vercel_terminal_settings_bridge(self):
+        cfg = {
+            "terminal": {
+                "backend": "vercel_sandbox",
+                "vercel_runtime": "python3.13",
+                "container_persistent": True,
+                "container_cpu": 2,
+                "container_memory": 4096,
+                "container_disk": 51200,
+            }
+        }
+        result = _simulate_config_bridge(cfg, {"MESSAGING_CWD": "/from/env"})
+        assert result["TERMINAL_ENV"] == "vercel_sandbox"
+        assert result["TERMINAL_VERCEL_RUNTIME"] == "python3.13"
+        assert result["TERMINAL_CONTAINER_PERSISTENT"] == "True"
+        assert result["TERMINAL_CONTAINER_CPU"] == "2"
+        assert result["TERMINAL_CONTAINER_MEMORY"] == "4096"
+        assert result["TERMINAL_CONTAINER_DISK"] == "51200"
@@ -14,8 +14,15 @@ from gateway.run import GatewayRunner
 class StubAdapter(BasePlatformAdapter):
    """Adapter whose connect() result can be controlled."""

-    def __init__(self, *, succeed=True, fatal_error=None, fatal_retryable=True):
-        super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
+    def __init__(
+        self,
+        *,
+        platform=Platform.TELEGRAM,
+        succeed=True,
+        fatal_error=None,
+        fatal_retryable=True,
+    ):
+        super().__init__(PlatformConfig(enabled=True, token="test"), platform)
        self._succeed = succeed
        self._fatal_error = fatal_error
        self._fatal_retryable = fatal_retryable
@@ -65,6 +72,85 @@ def _make_runner():

 # --- Startup queueing ---

+class TestStartupPlatformIsolation:
+    """Verify one blocked platform cannot prevent later platforms from starting."""
+
+    @pytest.mark.asyncio
+    async def test_start_continues_after_platform_connect_timeout(self, tmp_path):
+        """A timeout on Telegram should queue it and still connect Feishu."""
+        runner = _make_runner()
+        runner.config = GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="test"),
+                Platform.FEISHU: PlatformConfig(enabled=True, token="test"),
+            },
+            sessions_dir=tmp_path,
+        )
+        runner.hooks = MagicMock()
+        runner.hooks.loaded_hooks = []
+        runner.hooks.emit = AsyncMock()
+        runner._suspend_stuck_loop_sessions = MagicMock(return_value=0)
+        runner._update_runtime_status = MagicMock()
+        runner._update_platform_runtime_status = MagicMock()
+        runner._sync_voice_mode_state_to_adapter = MagicMock()
+        runner._send_update_notification = AsyncMock(return_value=True)
+        runner._send_restart_notification = AsyncMock()
+
+        adapters = {
+            Platform.TELEGRAM: StubAdapter(platform=Platform.TELEGRAM),
+            Platform.FEISHU: StubAdapter(platform=Platform.FEISHU),
+        }
+        runner._create_adapter = MagicMock(
+            side_effect=lambda platform, _config: adapters[platform]
+        )
+        runner._connect_adapter_with_timeout = AsyncMock(
+            side_effect=[
+                TimeoutError("telegram connect timed out after 30s"),
+                True,
+            ]
+        )
+
+        def fake_create_task(coro):
+            coro.close()
+            return MagicMock()
+
+        with patch("gateway.status.write_runtime_status"):
+            with patch("hermes_cli.plugins.discover_plugins"):
+                with patch("hermes_cli.config.load_config", return_value={}):
+                    with patch("agent.shell_hooks.register_from_config"):
+                        with patch(
+                            "tools.process_registry.process_registry.recover_from_checkpoint",
+                            return_value=0,
+                        ):
+                            with patch(
+                                "gateway.channel_directory.build_channel_directory",
+                                new=AsyncMock(return_value={"platforms": {}}),
+                            ):
+                                with patch("gateway.run.asyncio.create_task", side_effect=fake_create_task):
+                                    assert await runner.start() is True
+
+        assert Platform.TELEGRAM in runner._failed_platforms
+        assert Platform.FEISHU in runner.adapters
+        assert Platform.TELEGRAM not in runner.adapters
+        assert runner._create_adapter.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_connect_adapter_timeout_raises_retryable_exception(self, monkeypatch):
+        """The timeout helper turns a hanging connect into a caught startup error."""
+        runner = _make_runner()
+        adapter = StubAdapter()
+
+        async def hang():
+            await asyncio.sleep(60)
+            return True
+
+        adapter.connect = hang
+        monkeypatch.setenv("HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT", "0.001")
+
+        with pytest.raises(TimeoutError, match="telegram connect timed out"):
+            await runner._connect_adapter_with_timeout(adapter, Platform.TELEGRAM)
+
+
 class TestStartupFailureQueuing:
    """Verify that failed platforms are queued during startup."""

@@ -230,3 +230,30 @@ class TestHandleResumeCommand:

        assert real_key not in runner._running_agents
        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_evicts_cached_agent(self, tmp_path):
+        """Gateway /resume evicts the cached AIAgent so the next message
+        rebuilds with the correct session_id end-to-end — mirrors /branch
+        and /reset. Without this, the cached agent's memory provider keeps
+        writing into the wrong session. See #6672.
+        """
+        import threading
+        from hermes_state import SessionDB
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("old_session", "telegram")
+        db.set_session_title("old_session", "Old Work")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume Old Work")
+        runner = _make_runner(session_db=db, current_session_id="current_session_001",
+                              event=event)
+        # Seed the cache with a fake agent
+        real_key = _session_key_for_event(event)
+        runner._agent_cache = {real_key: (MagicMock(), object())}
+        runner._agent_cache_lock = threading.RLock()
+
+        await runner._handle_resume_command(event)
+
+        assert real_key not in runner._agent_cache
+        db.close()
@@ -800,15 +800,23 @@ class TestSignalSendDocumentViaHelper:


 # ---------------------------------------------------------------------------
-# send() returns message_id from timestamp (#4647)
+# Signal streaming edit capability / message_id behavior
 # ---------------------------------------------------------------------------

+class TestSignalStreamingCapabilities:
+    """Signal must opt out of edit-based streaming behavior."""
+
+    def test_signal_declares_no_message_editing(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+
+        assert adapter.SUPPORTS_MESSAGE_EDITING is False
+
+
 class TestSignalSendReturnsMessageId:
-    """Signal send() must return a timestamp-based message_id so the stream
-    consumer can follow its edit→fallback path correctly."""
+    """Signal send() should not pretend sent messages are editable."""

    @pytest.mark.asyncio
-    async def test_send_returns_timestamp_as_message_id(self, monkeypatch):
+    async def test_send_returns_none_message_id_even_with_timestamp(self, monkeypatch):
        adapter = _make_signal_adapter(monkeypatch)
        mock_rpc, _ = _stub_rpc({"timestamp": 1712345678000})
        adapter._rpc = mock_rpc
@@ -817,7 +825,7 @@ class TestSignalSendReturnsMessageId:
        result = await adapter.send(chat_id="+155****4567", content="hello")

        assert result.success is True
-        assert result.message_id == "1712345678000"
+        assert result.message_id is None

    @pytest.mark.asyncio
    async def test_send_returns_none_message_id_when_no_timestamp(self, monkeypatch):
@@ -997,3 +1005,100 @@ class TestSignalTypingBackoff:

        assert "+155****4567" not in adapter._typing_failures
        assert "+155****4567" not in adapter._typing_skip_until
+
+
+# ---------------------------------------------------------------------------
+# Reply quote extraction
+# ---------------------------------------------------------------------------
+
+class TestSignalQuoteExtraction:
+    """Verify Signal reply quote fields are propagated to MessageEvent."""
+
+    @pytest.mark.asyncio
+    async def test_handle_envelope_sets_reply_context_from_quote(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        captured = {}
+
+        async def fake_handle(event):
+            captured["event"] = event
+
+        adapter.handle_message = fake_handle
+
+        await adapter._handle_envelope({
+            "envelope": {
+                "sourceNumber": "+15550001111",
+                "sourceUuid": "uuid-sender",
+                "sourceName": "Tester",
+                "timestamp": 1000000000,
+                "dataMessage": {
+                    "message": "yes I agree",
+                    "quote": {
+                        "id": 99,
+                        "text": "want to grab lunch?",
+                        "author": "+15550002222",
+                    },
+                },
+            }
+        })
+
+        event = captured["event"]
+        assert event.text == "yes I agree"
+        assert event.reply_to_message_id == "99"
+        assert event.reply_to_text == "want to grab lunch?"
+
+    @pytest.mark.asyncio
+    async def test_handle_envelope_without_quote_leaves_reply_fields_none(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        captured = {}
+
+        async def fake_handle(event):
+            captured["event"] = event
+
+        adapter.handle_message = fake_handle
+
+        await adapter._handle_envelope({
+            "envelope": {
+                "sourceNumber": "+15550001111",
+                "sourceUuid": "uuid-sender",
+                "sourceName": "Tester",
+                "timestamp": 1000000000,
+                "dataMessage": {
+                    "message": "plain message",
+                },
+            }
+        })
+
+        event = captured["event"]
+        assert event.text == "plain message"
+        assert event.reply_to_message_id is None
+        assert event.reply_to_text is None
+
+    @pytest.mark.asyncio
+    async def test_handle_envelope_quote_without_text_sets_only_reply_id(self, monkeypatch):
+        adapter = _make_signal_adapter(monkeypatch)
+        captured = {}
+
+        async def fake_handle(event):
+            captured["event"] = event
+
+        adapter.handle_message = fake_handle
+
+        await adapter._handle_envelope({
+            "envelope": {
+                "sourceNumber": "+15550001111",
+                "sourceUuid": "uuid-sender",
+                "sourceName": "Tester",
+                "timestamp": 1000000000,
+                "dataMessage": {
+                    "message": "reply without quote text",
+                    "quote": {
+                        "id": 123,
+                        "author": "+15550002222",
+                    },
+                },
+            }
+        })
+
+        event = captured["event"]
+        assert event.reply_to_message_id == "123"
+        assert event.reply_to_text is None
@@ -0,0 +1,452 @@
+"""Tests for Signal _markdown_to_signal() formatting.
+
+Covers the markdown-to-bodyRanges conversion pipeline: bold, italic,
+strikethrough, monospace, code blocks, headings, and — critically — the
+false-positive regressions that caused spurious italics in production.
+"""
+
+import pytest
+
+from gateway.config import PlatformConfig
+from gateway.platforms.signal import SignalAdapter
+
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+def _m2s(text: str):
+    """Shorthand: call the static method and return (plain_text, styles)."""
+    return SignalAdapter._markdown_to_signal(text)
+
+
+def _style_types(styles: list[str]) -> list[str]:
+    """Extract just the STYLE part from '0:4:BOLD' strings."""
+    return [s.rsplit(":", 1)[1] for s in styles]
+
+
+def _find_style(styles: list[str], style_type: str) -> list[str]:
+    """Return only styles matching a given type."""
+    return [s for s in styles if s.endswith(f":{style_type}")]
+
+
+# ===========================================================================
+# Basic formatting
+# ===========================================================================
+
+class TestMarkdownToSignalBasic:
+    """Core formatting: bold, italic, strikethrough, monospace."""
+
+    def test_bold_double_asterisk(self):
+        text, styles = _m2s("hello **world**")
+        assert text == "hello world"
+        assert len(styles) == 1
+        assert styles[0].endswith(":BOLD")
+
+    def test_bold_double_underscore(self):
+        text, styles = _m2s("hello __world__")
+        assert text == "hello world"
+        assert len(styles) == 1
+        assert styles[0].endswith(":BOLD")
+
+    def test_italic_single_asterisk(self):
+        text, styles = _m2s("hello *world*")
+        assert text == "hello world"
+        assert len(styles) == 1
+        assert styles[0].endswith(":ITALIC")
+
+    def test_italic_single_underscore(self):
+        text, styles = _m2s("hello _world_")
+        assert text == "hello world"
+        assert len(styles) == 1
+        assert styles[0].endswith(":ITALIC")
+
+    def test_strikethrough(self):
+        text, styles = _m2s("hello ~~world~~")
+        assert text == "hello world"
+        assert len(styles) == 1
+        assert styles[0].endswith(":STRIKETHROUGH")
+
+    def test_inline_monospace(self):
+        text, styles = _m2s("run `ls -la` now")
+        assert text == "run ls -la now"
+        assert len(styles) == 1
+        assert styles[0].endswith(":MONOSPACE")
+
+    def test_fenced_code_block(self):
+        text, styles = _m2s("before\n```\ncode here\n```\nafter")
+        assert "code here" in text
+        assert "```" not in text
+        assert any(s.endswith(":MONOSPACE") for s in styles)
+
+    def test_heading_becomes_bold(self):
+        text, styles = _m2s("## Section Title")
+        assert text == "Section Title"
+        assert len(styles) == 1
+        assert styles[0].endswith(":BOLD")
+
+    def test_multiple_styles(self):
+        text, styles = _m2s("**bold** and *italic*")
+        assert text == "bold and italic"
+        types = _style_types(styles)
+        assert "BOLD" in types
+        assert "ITALIC" in types
+
+    def test_plain_text_no_styles(self):
+        text, styles = _m2s("just plain text")
+        assert text == "just plain text"
+        assert styles == []
+
+    def test_empty_string(self):
+        text, styles = _m2s("")
+        assert text == ""
+        assert styles == []
+
+
+# ===========================================================================
+# Italic false-positive regressions
+# ===========================================================================
+
+class TestItalicFalsePositives:
+    """Regressions from signal-italic-false-positive-fix.md and
+    signal-italic-bullet-list-fix.md."""
+
+    # --- snake_case (original fix) ---
+
+    def test_snake_case_not_italic(self):
+        """snake_case identifiers must NOT be italicized."""
+        text, styles = _m2s("the config_file is ready")
+        assert text == "the config_file is ready"
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_multiple_snake_case(self):
+        text, styles = _m2s("set OPENAI_API_KEY and ANTHROPIC_API_KEY")
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_snake_case_path(self):
+        text, styles = _m2s("/tools/delegate_tool.py")
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_snake_case_between_words(self):
+        """file_path and error_code — underscores between words."""
+        text, styles = _m2s("file_path and error_code")
+        assert _find_style(styles, "ITALIC") == []
+
+    # --- Bullet lists (second fix) ---
+
+    def test_bullet_list_not_italic(self):
+        """* item lines must NOT be treated as italic delimiters."""
+        md = "* item one\n* item two\n* item three"
+        text, styles = _m2s(md)
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_bullet_list_with_content_before(self):
+        md = "Here are things:\n\n* first thing\n* second thing"
+        text, styles = _m2s(md)
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_bullet_list_file_paths(self):
+        """Real-world case that triggered the bug."""
+        md = (
+            "* tools/delegate_tool.py — delegation\n"
+            "* tools/file_tools.py — file operations\n"
+            "* tools/web_tools.py — web operations"
+        )
+        text, styles = _m2s(md)
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_bullet_with_italic_inside(self):
+        """Italic *inside* a bullet item should still work."""
+        md = "* this has *emphasis* inside\n* plain item"
+        text, styles = _m2s(md)
+        italic_styles = _find_style(styles, "ITALIC")
+        assert len(italic_styles) == 1
+        # The italic should cover "emphasis", not the whole bullet
+        assert "emphasis" in text
+
+    # --- Cross-line spans (DOTALL removal) ---
+
+    def test_star_italic_no_cross_line(self):
+        """*foo\\nbar* must NOT match as italic (no DOTALL)."""
+        text, styles = _m2s("*foo\nbar*")
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_underscore_italic_no_cross_line(self):
+        """_foo\\nbar_ must NOT match as italic (no DOTALL)."""
+        text, styles = _m2s("_foo\nbar_")
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_star_italic_multiline_response(self):
+        """Multi-paragraph response with * should not false-positive."""
+        md = (
+            "I checked the following files:\n\n"
+            "* tools/delegate_tool.py — sub-agent delegation\n"
+            "* tools/file_tools.py — file read/write/search\n"
+            "* tools/web_tools.py — web search/extract\n\n"
+            "Everything looks good."
+        )
+        text, styles = _m2s(md)
+        assert _find_style(styles, "ITALIC") == []
+
+    # --- Legitimate italic still works ---
+
+    def test_star_italic_still_works(self):
+        text, styles = _m2s("this is *italic* text")
+        assert text == "this is italic text"
+        assert len(_find_style(styles, "ITALIC")) == 1
+
+    def test_underscore_italic_still_works(self):
+        text, styles = _m2s("this is _italic_ text")
+        assert text == "this is italic text"
+        assert len(_find_style(styles, "ITALIC")) == 1
+
+    def test_multiple_italic_same_line(self):
+        text, styles = _m2s("*foo* and *bar* ok")
+        assert text == "foo and bar ok"
+        assert len(_find_style(styles, "ITALIC")) == 2
+
+    def test_italic_single_word(self):
+        text, styles = _m2s("*word*")
+        assert text == "word"
+        assert len(_find_style(styles, "ITALIC")) == 1
+
+    def test_italic_multi_word(self):
+        text, styles = _m2s("*several words here*")
+        assert text == "several words here"
+        assert len(_find_style(styles, "ITALIC")) == 1
+
+
+# ===========================================================================
+# Style position accuracy
+# ===========================================================================
+
+class TestStylePositions:
+    """Verify that start:length positions map to the correct text."""
+
+    def _extract(self, text: str, style_str: str) -> str:
+        """Given 'start:length:STYLE', extract the substring from text."""
+        # Positions are UTF-16 code units; for ASCII they match code points
+        parts = style_str.split(":")
+        start, length = int(parts[0]), int(parts[1])
+        # Encode to UTF-16-LE, slice, decode back
+        encoded = text.encode("utf-16-le")
+        extracted = encoded[start * 2 : (start + length) * 2]
+        return extracted.decode("utf-16-le")
+
+    def test_bold_position(self):
+        text, styles = _m2s("hello **world** end")
+        assert len(styles) == 1
+        assert self._extract(text, styles[0]) == "world"
+
+    def test_italic_position(self):
+        text, styles = _m2s("hello *world* end")
+        assert len(styles) == 1
+        assert self._extract(text, styles[0]) == "world"
+
+    def test_multiple_styles_positions(self):
+        text, styles = _m2s("**bold** then *italic*")
+        assert len(styles) == 2
+        extracted = {self._extract(text, s) for s in styles}
+        assert extracted == {"bold", "italic"}
+
+    def test_emoji_utf16_offset(self):
+        """Emoji (multi-byte UTF-16) before a styled span."""
+        text, styles = _m2s("👋 **hello**")
+        assert text == "👋 hello"
+        assert len(styles) == 1
+        assert self._extract(text, styles[0]) == "hello"
+
+
+# ===========================================================================
+# Edge cases
+# ===========================================================================
+
+class TestEdgeCases:
+    """Tricky inputs that have caused issues or could regress."""
+
+    def test_bold_inside_bullet(self):
+        """Bold inside a bullet list item."""
+        md = "* **important** item\n* normal item"
+        text, styles = _m2s(md)
+        assert len(_find_style(styles, "BOLD")) == 1
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_code_span_with_underscores(self):
+        """`snake_case_var` — backtick takes priority over underscore."""
+        text, styles = _m2s("use `my_var_name` here")
+        assert text == "use my_var_name here"
+        types = _style_types(styles)
+        assert "MONOSPACE" in types
+        assert "ITALIC" not in types
+
+    def test_bold_and_italic_nested(self):
+        """***bold+italic*** — bold captured, not italic (bold pattern first)."""
+        text, styles = _m2s("***word***")
+        # ** matches bold around *word*, or *** is ambiguous;
+        # either way there should be no false italic of the whole string
+        assert "word" in text
+
+    def test_lone_asterisk(self):
+        """A single * with no pair should not cause issues."""
+        text, styles = _m2s("5 * 3 = 15")
+        # Should not crash; any italic match would be a false positive
+        assert "5" in text and "15" in text
+
+    def test_lone_underscore(self):
+        """A single _ with no pair."""
+        text, styles = _m2s("this _ that")
+        assert text == "this _ that"
+
+    def test_consecutive_underscored_words(self):
+        """_foo and _bar (leading underscores, no closers)."""
+        text, styles = _m2s("call _init and _setup")
+        assert _find_style(styles, "ITALIC") == []
+
+    def test_mixed_formatting_no_bleed(self):
+        """Multiple format types don't bleed into each other."""
+        md = "**bold** and `code` and *italic* and ~~strike~~"
+        text, styles = _m2s(md)
+        assert text == "bold and code and italic and strike"
+        types = _style_types(styles)
+        assert sorted(types) == ["BOLD", "ITALIC", "MONOSPACE", "STRIKETHROUGH"]
+
+
+# ===========================================================================
+# signal-markdown-strip-patch: core conversion pipeline
+# ===========================================================================
+
+class TestMarkdownStripPatch:
+    """Tests for the original signal-markdown-strip-patch.
+    
+    Covers: fenced code blocks with language tags, links preserved,
+    headings converted to bold, multiple headings, UTF-16 correctness
+    for multi-byte characters, and marker stripping completeness.
+    """
+
+    def test_fenced_code_block_with_language_tag(self):
+        """```python\\ncode\\n``` — language tag is stripped, content is MONOSPACE."""
+        text, styles = _m2s("```python\nprint('hello')\n```")
+        assert "```" not in text
+        assert "python" not in text  # language tag stripped
+        assert "print('hello')" in text
+        assert any(s.endswith(":MONOSPACE") for s in styles)
+
+    def test_fenced_code_block_multiline(self):
+        """Multi-line code blocks preserve all lines."""
+        md = "```\nline1\nline2\nline3\n```"
+        text, styles = _m2s(md)
+        assert "line1" in text
+        assert "line2" in text
+        assert "line3" in text
+        assert "```" not in text
+
+    def test_links_preserved(self):
+        """[text](url) links are kept as-is — Signal auto-linkifies."""
+        md = "Check [this link](https://example.com) for details"
+        text, styles = _m2s(md)
+        # Links should pass through — either as markdown or just preserved
+        assert "https://example.com" in text
+
+    def test_heading_h1(self):
+        """# H1 becomes bold text."""
+        text, styles = _m2s("# Main Title")
+        assert text == "Main Title"
+        assert len(styles) == 1
+        assert styles[0].endswith(":BOLD")
+
+    def test_heading_h3(self):
+        """### H3 becomes bold text."""
+        text, styles = _m2s("### Sub Section")
+        assert text == "Sub Section"
+        assert len(styles) == 1
+        assert styles[0].endswith(":BOLD")
+
+    def test_multiple_headings(self):
+        """Multiple headings each become separate bold spans."""
+        md = "## First\n\nSome text\n\n## Second"
+        text, styles = _m2s(md)
+        assert "First" in text
+        assert "Second" in text
+        assert "##" not in text
+        bold_styles = _find_style(styles, "BOLD")
+        assert len(bold_styles) == 2
+
+    def test_no_raw_markdown_markers_in_output(self):
+        """All markdown syntax is stripped from plain text output."""
+        md = "**bold** and *italic* and ~~struck~~ and `code` and ## heading"
+        text, styles = _m2s(md)
+        assert "**" not in text
+        assert "~~" not in text
+        assert "`" not in text
+        # ## at end might remain if not at line start — that's ok
+        # The important thing is styled markers are stripped
+
+    def test_utf16_surrogate_pair_emoji(self):
+        """Emoji requiring UTF-16 surrogate pairs don't corrupt offsets."""
+        # 🎉 is U+1F389 — requires surrogate pair (2 UTF-16 code units)
+        text, styles = _m2s("🎉🎉 **test**")
+        assert "test" in text
+        assert len(styles) == 1
+        # Verify the style position is correct
+        parts = styles[0].split(":")
+        start, length = int(parts[0]), int(parts[1])
+        # 🎉🎉 = 4 UTF-16 code units + space = 5, then "test" = 4
+        assert start == 5
+        assert length == 4
+
+    def test_consecutive_newlines_collapsed(self):
+        """3+ consecutive newlines are collapsed to 2."""
+        text, styles = _m2s("first\n\n\n\n\nsecond")
+        assert "\n\n\n" not in text
+        assert "first" in text
+        assert "second" in text
+
+    def test_empty_bold_not_crash(self):
+        """**** (empty bold) should not crash."""
+        text, styles = _m2s("before **** after")
+        # Should not raise — exact output doesn't matter much
+        assert "before" in text
+
+
+# ===========================================================================
+# signal-streaming-patch: SUPPORTS_MESSAGE_EDITING and send() behavior
+# ===========================================================================
+
+class TestSignalStreamingPatch:
+    """Tests for signal-streaming-patch: cursor suppression and edit support.
+    
+    These verify the adapter-level properties that prevent the streaming
+    cursor from leaking into Signal messages.
+    """
+
+    def test_signal_does_not_support_editing(self, monkeypatch):
+        """SignalAdapter.SUPPORTS_MESSAGE_EDITING must be False."""
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+        from gateway.platforms.signal import SignalAdapter
+        assert SignalAdapter.SUPPORTS_MESSAGE_EDITING is False
+
+    @pytest.mark.asyncio
+    async def test_send_returns_no_message_id(self, monkeypatch):
+        """send() returns message_id=None so stream consumer uses no-edit path."""
+        monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
+        from gateway.platforms.signal import SignalAdapter
+        from gateway.config import PlatformConfig
+
+        config = PlatformConfig(enabled=True)
+        config.extra = {
+            "http_url": "http://localhost:8080",
+            "account": "+15551234567",
+        }
+        adapter = SignalAdapter(config)
+
+        # Mock the RPC call
+        async def mock_rpc(method, params, rpc_id=None):
+            return {"timestamp": 1234567890}
+
+        adapter._rpc = mock_rpc
+
+        result = await adapter.send(
+            chat_id="+15559876543",
+            content="Hello",
+        )
+        assert result.message_id is None
--- a/Show More
+++ b/Show More