fix(error_classifier): retry mid-stream SSL/TLS alert errors as transport

Mid-stream SSL alerts (bad_record_mac, tls_alert_internal_error, handshake failures) previously fell through the classifier pipeline to the 'unknown' bucket because: - ssl.SSLError type names weren't in _TRANSPORT_ERROR_TYPES (the isinstance(OSError) catch picks up some but not all SDK-wrapped forms) - the message-pattern list had no SSL alert substrings The 'unknown' bucket is still retryable, but: (a) logs tell the user 'unknown' instead of identifying the cause, (b) it bypasses the transport-specific backoff/fallback logic, and (c) if the SSL error happens on a large session with a generic 'connection closed' wrapper, the existing disconnect-on-large-session heuristic would incorrectly trigger context compression — expensive, and never fixes a transport hiccup. Changes: - Add ssl.SSLError and its subclass type names to _TRANSPORT_ERROR_TYPES - New _SSL_TRANSIENT_PATTERNS list (separate from _SERVER_DISCONNECT_PATTERNS so SSL alerts route to timeout, not context_overflow+compress) - New step 5 in the classifier pipeline: SSL pattern check runs BEFORE the disconnect check to pre-empt the large-session-compress path Patterns cover both space-separated ('ssl alert', 'bad record mac') and underscore-separated ('ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC') forms. This is load-bearing because OpenSSL 3.x changed the error-code separator from underscore to slash (e.g. SSLV3_ALERT_BAD_RECORD_MAC → SSL/TLS_ALERT_BAD_RECORD_MAC) and will likely churn again — matching on stable alert reason substrings survives future format changes. Tests (8 new): - BAD_RECORD_MAC in Python ssl.c format - OpenSSL 3.x underscore format - TLSV1_ALERT_INTERNAL_ERROR - ssl handshake failure - [SSL: ...] prefix fallback - Real ssl.SSLError instance - REGRESSION GUARD: SSL on large session does NOT compress - REGRESSION GUARD: plain disconnect on large session STILL compresses
fix(gateway): also unlink stale PID + lock files on cleanup
2026-04-22 17:07:12 -07:00 · 2026-04-22 16:33:46 -07:00 · 2026-04-22 16:33:46 -07:00 · 2026-04-22 16:33:36 -07:00 · 2026-04-22 16:33:25 -07:00 · 2026-04-22 16:33:25 -07:00
215 changed files with 22136 additions and 5822 deletions
@@ -266,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -323,9 +331,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1066,6 +1083,31 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1221,6 +1263,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1232,7 +1275,25 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1409,11 +1470,25 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -134,6 +134,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
@@ -182,8 +183,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -845,7 +844,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
            if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

@@ -871,7 +870,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
        if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

@@ -927,22 +926,35 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
-    # Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
-    # for both text and vision tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
    if runtime is not None:
        api_key, base_url = runtime
    else:
@@ -1487,7 +1499,7 @@ def _to_async_client(sync_client, model: str):

        async_kwargs["default_headers"] = copilot_default_headers()
    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1674,7 +1686,7 @@ def resolve_provider_client(
            )
            extra = {}
            if base_url_host_matches(custom_base, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
@@ -1781,7 +1793,7 @@ def resolve_provider_client(
        # Provider-specific headers
        headers = {}
        if base_url_host_matches(base_url, "api.kimi.com"):
-            headers["User-Agent"] = "KimiCLI/1.30.0"
+            headers["User-Agent"] = "claude-code/0.1.0"
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

@@ -64,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -1144,10 +1185,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1191,12 +1235,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -220,12 +220,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +249,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +302,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -388,7 +436,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +464,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -470,11 +529,16 @@ def _classify_by_status(
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
        return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
        )

    if status_code == 413:
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
@@ -4,6 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
 import re
 import time
@@ -25,7 +26,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -36,7 +37,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -51,6 +52,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -125,6 +133,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -177,6 +187,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "mimo-v2-pro": 1000000,
    "mimo-v2-omni": 256000,
    "mimo-v2-flash": 256000,
+    "mimo-v2.5-pro": 1000000,
+    "mimo-v2.5": 1000000,
    "zai-org/GLM-5": 202752,
 }

@@ -237,6 +249,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -281,7 +295,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -296,14 +318,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -314,6 +339,8 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -37,3 +37,15 @@ def _discover_transports() -> None:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
@@ -87,14 +87,20 @@ class AnthropicTransport(ProviderTransport):
        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid."""
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return False
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
@@ -914,6 +914,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
    print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")


+def _run_state_db_auto_maintenance(session_db) -> None:
+    """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
+
+    Reads the ``sessions:`` section from config.yaml via
+    :func:`hermes_cli.config.load_config` (the authoritative loader that
+    deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
+    values). Honours ``auto_prune`` / ``retention_days`` /
+    ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
+    DB. Never raises — maintenance must never block interactive startup.
+    """
+    if session_db is None:
+        return
+    try:
+        from hermes_cli.config import load_config as _load_full_config
+        cfg = (_load_full_config().get("sessions") or {})
+        if not cfg.get("auto_prune", False):
+            return
+        session_db.maybe_auto_prune_and_vacuum(
+            retention_days=int(cfg.get("retention_days", 90)),
+            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
+            vacuum=bool(cfg.get("vacuum_after_prune", True)),
+        )
+    except Exception as exc:
+        logger.debug("state.db auto-maintenance skipped: %s", exc)
+
+
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
    """Remove stale worktrees and orphaned branches on startup.

@@ -1961,7 +1987,13 @@ class HermesCLI:
            self._session_db = SessionDB()
        except Exception as e:
            logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
-        
+
+        # Opportunistic state.db maintenance — runs at most once per
+        # min_interval_hours, tracked via state_meta in state.db itself so
+        # it's shared across all Hermes processes for this HERMES_HOME.
+        # Never blocks startup on failure.
+        _run_state_db_auto_maintenance(self._session_db)
+
        # Deferred title: stored in memory until the session is created in the DB
        self._pending_title: Optional[str] = None
        
@@ -616,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -135,9 +135,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -2129,10 +2129,42 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2179,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2199,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -35,7 +35,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +73,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -234,6 +236,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -277,12 +281,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -292,7 +320,7 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)

@@ -505,14 +533,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +554,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -532,7 +566,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -584,7 +617,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -602,19 +635,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -652,8 +687,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -663,7 +697,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -671,17 +705,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -692,31 +726,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -724,6 +775,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +787,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1009,8 +1062,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1094,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -2470,13 +2646,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2935,14 +3120,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2959,7 +3150,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3308,15 +3499,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3593,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3616,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3671,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4232,12 +4447,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4246,13 +4461,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
-    create_bind_task,
-    poll_bind_result,
    build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -44,9 +43,8 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
    "build_connect_url",
+    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------


-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -64,8 +92,8 @@ async def create_bind_task(
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -80,7 +108,7 @@ async def create_bind_task(
    return task_id, key


-async def poll_bind_result(
+def _poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -102,8 +124,8 @@ async def poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
        # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
        self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
        self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -362,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
        if not thread_ts:
            return  # Can only set status in a thread context

+        self._active_status_threads[chat_id] = thread_ts
        try:
            await self._get_client(chat_id).assistant_threads_setStatus(
                channel_id=chat_id,
@@ -373,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
    def _dm_top_level_threads_as_sessions(self) -> bool:
        """Whether top-level Slack DMs get per-message session threads.

@@ -584,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
            logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
            return False

+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
    # ----- User identity resolution -----

    async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1213,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
        # Only react when bot is directly addressed (DM or @mention).
        # In listen-all channels (require_mention=false), reacting to every
        # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
        if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)

        await self.handle_message(msg_event)

-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
    # ----- Approval button support (Block Kit) -----

    async def send_exec_approval(
@@ -1464,3 +1464,134 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
@@ -710,7 +710,26 @@ class GatewayRunner:
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
-        
+
+        # Opportunistic state.db maintenance: prune ended sessions older
+        # than sessions.retention_days + optional VACUUM. Tracks last-run
+        # in state_meta so it only actually executes once per
+        # sessions.min_interval_hours.  Gateway is long-lived so blocking
+        # a few seconds once per day is acceptable; failures are logged
+        # but never raised.
+        if self._session_db is not None:
+            try:
+                from hermes_cli.config import load_config as _load_full_config
+                _sess_cfg = (_load_full_config().get("sessions") or {})
+                if _sess_cfg.get("auto_prune", False):
+                    self._session_db.maybe_auto_prune_and_vacuum(
+                        retention_days=int(_sess_cfg.get("retention_days", 90)),
+                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
+                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
+                    )
+            except Exception as exc:
+                logger.debug("state.db auto-maintenance skipped: %s", exc)
+
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
@@ -2668,8 +2687,9 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file
+            from gateway.status import remove_pid_file, release_gateway_runtime_lock
            remove_pid_file()
+            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -3466,23 +3486,73 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-        
-        # Emit command:* hook for any recognized slash command.
-        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
-        # in hermes_cli/commands.py — no hardcoded set to maintain here.
-        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
-        if command and command in GATEWAY_KNOWN_COMMANDS:
-            await self.hooks.emit(f"command:{command}", {
-                "platform": source.platform.value if source.platform else "",
-                "user_id": source.user_id,
-                "command": command,
-                "args": event.get_command_args().strip(),
-            })

-        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        from hermes_cli.commands import (
+            GATEWAY_KNOWN_COMMANDS,
+            is_gateway_known_command,
+            resolve_command as _resolve_cmd,
+        )
+
+        # Resolve aliases to canonical name so dispatch and hook names
+        # don't depend on the exact alias the user typed.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

+        # Fire the ``command:<canonical>`` hook for any recognized slash
+        # command — built-in OR plugin-registered. Handlers can return a
+        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
+        # to intercept dispatch before core handling runs. This replaces
+        # the previous fire-and-forget emit(): return values are now
+        # honored, but handlers that return nothing behave exactly as
+        # before (telemetry-style hooks keep working).
+        if command and is_gateway_known_command(canonical):
+            raw_args = event.get_command_args().strip()
+            hook_ctx = {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": canonical,
+                "raw_command": command,
+                "args": raw_args,
+                "raw_args": raw_args,
+            }
+            try:
+                hook_results = await self.hooks.emit_collect(
+                    f"command:{canonical}", hook_ctx
+                )
+            except Exception as _hook_err:
+                logger.debug(
+                    "command:%s hook dispatch failed (non-fatal): %s",
+                    canonical, _hook_err,
+                )
+                hook_results = []
+
+            for hook_result in hook_results:
+                if not isinstance(hook_result, dict):
+                    continue
+                decision = str(hook_result.get("decision", "")).strip().lower()
+                if not decision or decision == "allow":
+                    continue
+                if decision == "deny":
+                    message = hook_result.get("message")
+                    if isinstance(message, str) and message:
+                        return message
+                    return f"Command `/{command}` was blocked by a hook."
+                if decision == "handled":
+                    message = hook_result.get("message")
+                    return message if isinstance(message, str) and message else None
+                if decision == "rewrite":
+                    new_command = str(
+                        hook_result.get("command_name", "")
+                    ).strip().lstrip("/")
+                    if not new_command:
+                        continue
+                    new_args = str(hook_result.get("raw_args", "")).strip()
+                    event.text = f"/{new_command} {new_args}".strip()
+                    command = event.get_command()
+                    _cmd_def = _resolve_cmd(command) if command else None
+                    canonical = _cmd_def.name if _cmd_def else command
+                    break
+
        if canonical == "new":
            return await self._handle_reset_command(event)
        
@@ -6456,6 +6526,11 @@ class GatewayRunner:
                    session_id=task_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -7216,6 +7291,7 @@ class GatewayRunner:
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                    reasoning=msg.get("reasoning"),
+                    reasoning_content=msg.get("reasoning_content"),
                )
            except Exception:
                pass  # Best-effort copy
@@ -7620,13 +7696,14 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE,
+            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
+            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -9698,6 +9775,11 @@ class GatewayRunner:
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
@@ -10764,7 +10846,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
+    from gateway.status import (
+        acquire_gateway_runtime_lock,
+        get_running_pid,
+        release_gateway_runtime_lock,
+        remove_pid_file,
+        terminate_pid,
+    )
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
@@ -10977,14 +11065,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            "Exiting to avoid double-running.", _current_pid
        )
        return False
+    if not acquire_gateway_runtime_lock():
+        logger.error(
+            "Gateway runtime lock is already held by another instance. Exiting."
+        )
+        return False
    try:
        write_pid_file()
    except FileExistsError:
+        release_gateway_runtime_lock()
        logger.error(
            "PID file race lost to another gateway instance. Exiting."
        )
        return False
    atexit.register(remove_pid_file)
+    atexit.register(release_gateway_runtime_lock)

    # Start the gateway
    success = await runner.start()
@@ -1147,6 +1147,10 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1176,6 +1180,7 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    )
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -212,16 +227,135 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
    except Exception:
        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
+    except Exception:
+        pass
+
+
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass


 def write_pid_file() -> None:
@@ -583,35 +717,42 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -72,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -168,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="kimi-coding",
        name="Kimi / Moonshot",
        auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
        inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
        base_url_env_var="KIMI_BASE_URL",
    ),
    "kimi-coding-cn": ProviderConfig(
@@ -179,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.moonshot.cn/v1",
        api_key_env_vars=("KIMI_CN_API_KEY",),
    ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "arcee": ProviderConfig(
        id="arcee",
        name="Arcee AI",
@@ -340,10 +353,16 @@ def get_anthropic_key() -> str:
 # =============================================================================

 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"


 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -983,6 +1002,7 @@ def resolve_provider(
        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -3375,7 +3395,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3404,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
    return lines


+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
    return result


@@ -750,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -759,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
    return mapping


@@ -613,6 +613,10 @@ DEFAULT_CONFIG = {
    },
    
    # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
@@ -836,6 +840,7 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -889,6 +894,34 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1046,6 +1079,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "ARCEEAI_API_KEY": {
        "description": "Arcee AI API key",
        "prompt": "Arcee AI API key",
@@ -2098,6 +2147,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }

 # Valid fields inside a custom_providers list entry
@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
    from hermes_cli.logs import LOG_FILES

    filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
        return None

-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.

-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)

    try:
        size = log_path.stat().st_size
        if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)

-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }


 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
        dump_text = _capture_dump()
    buf.write(dump_text)

+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n")

    return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -912,6 +912,7 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -943,18 +944,22 @@ def run_doctor(args):
            try:
                import httpx
                _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                if _base and _base.rstrip("/").endswith("/anthropic"):
                    from agent.auxiliary_client import _to_openai_base_url
                    _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
                _headers = {"Authorization": f"Bearer {_key}"}
                if base_url_host_matches(_base, "api.kimi.com"):
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                _resp = httpx.get(
                    _url,
                    headers=_headers,
@@ -160,6 +160,8 @@ def load_hermes_dotenv(
    # Fix corrupted .env files before python-dotenv parses them (#8908).
    if user_env.exists():
        _sanitize_env_file_if_needed(user_env)
+    if project_env_path and project_env_path.exists():
+        _sanitize_env_file_if_needed(project_env_path)

    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
@@ -333,6 +333,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


+def _read_systemd_unit_properties(
+    system: bool = False,
+    properties: tuple[str, ...] = (
+        "ActiveState",
+        "SubState",
+        "Result",
+        "ExecMainStatus",
+    ),
+) -> dict[str, str]:
+    """Return selected ``systemctl show`` properties for the gateway unit."""
+    selected_system = _select_systemd_scope(system)
+    try:
+        result = _run_systemctl(
+            [
+                "show",
+                get_service_name(),
+                "--no-pager",
+                "--property",
+                ",".join(properties),
+            ],
+            system=selected_system,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (RuntimeError, subprocess.TimeoutExpired, OSError):
+        return {}
+
+    if result.returncode != 0:
+        return {}
+
+    parsed: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        parsed[key] = value.strip()
+    return parsed
+
+
+def _wait_for_systemd_service_restart(
+    *,
+    system: bool = False,
+    previous_pid: int | None = None,
+    timeout: float = 60.0,
+) -> bool:
+    """Wait for the gateway service to become active after a restart handoff."""
+    import time
+
+    svc = get_service_name()
+    scope_label = _service_scope_label(system).capitalize()
+    deadline = time.time() + timeout
+
+    while time.time() < deadline:
+        props = _read_systemd_unit_properties(system=system)
+        active_state = props.get("ActiveState", "")
+        sub_state = props.get("SubState", "")
+        new_pid = None
+        try:
+            from gateway.status import get_running_pid
+
+            new_pid = get_running_pid()
+        except Exception:
+            new_pid = None
+
+        if active_state == "active":
+            if new_pid and (previous_pid is None or new_pid != previous_pid):
+                print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                return True
+            if previous_pid is None:
+                print(f"✓ {scope_label} service restarted")
+                return True
+
+        if active_state == "activating" and sub_state == "auto-restart":
+            time.sleep(1)
+            continue
+
+        time.sleep(2)
+
+    print(
+        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
+        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
+    )
+    return False
+
+
+def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
+    """Recover a planned service restart that is stuck in systemd state."""
+    props = _read_systemd_unit_properties(system=system)
+    if not props:
+        return False
+
+    try:
+        from gateway.status import read_runtime_status
+    except Exception:
+        return False
+
+    runtime_state = read_runtime_status() or {}
+    if not runtime_state.get("restart_requested"):
+        return False
+
+    active_state = props.get("ActiveState", "")
+    sub_state = props.get("SubState", "")
+    exec_main_status = props.get("ExecMainStatus", "")
+    result = props.get("Result", "")
+
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("⏳ Service restart already pending — waiting for systemd relaunch...")
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    if active_state == "failed" and (
+        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
+        or result == "exit-code"
+    ):
+        svc = get_service_name()
+        scope_label = _service_scope_label(system).capitalize()
+        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
+        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    return False
+
+
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -470,7 +611,8 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    remove_pid_file()
+    if get_running_pid() is None:
+        remove_pid_file()
    return True


@@ -1505,14 +1647,9 @@ def systemd_restart(system: bool = False):

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        # SIGUSR1 sent — the gateway will drain active agents, exit with
-        # code 75, and systemd will restart it after RestartSec (30s).
-        # Wait for the old process to die and the new one to become active
-        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
-        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1526,48 +1663,41 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # Phase 2: wait for systemd to start the new process
-        print(f"⏳ Waiting for {svc} to restart...")
-        deadline = time.time() + 60
-        while time.time() < deadline:
-            try:
-                result = subprocess.run(
-                    scope_cmd + ["is-active", svc],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if result.stdout.strip() == "active":
-                    # Verify it's a NEW process, not the old one somehow
-                    new_pid = get_running_pid()
-                    if new_pid and new_pid != pid:
-                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                        return
-            except (subprocess.TimeoutExpired, FileNotFoundError):
-                pass
-            time.sleep(2)
-
-        # Timed out — check final state
-        try:
-            result = subprocess.run(
-                scope_cmd + ["is-active", svc],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                print(f"✓ {scope_label} service restarted")
-                return
-        except Exception:
-            pass
-        print(
-            f"⚠ {scope_label} service did not become active within 60s.\n"
-            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        # The gateway exits with code 75 for a planned service restart.
+        # systemd can sit in the RestartSec window or even wedge itself into a
+        # failed/rate-limited state if the operator asks for another restart in
+        # the middle of that handoff. Clear any stale failed state and kick the
+        # unit immediately so `hermes gateway restart` behaves idempotently.
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
        return
+
+    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        return
+
+    _run_systemctl(
+        ["reset-failed", get_service_name()],
+        system=system,
+        check=False,
+        timeout=30,
+    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False):
+def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1590,8 +1720,12 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

+    status_cmd = ["status", get_service_name(), "--no-pager"]
+    if full:
+        status_cmd.append("-l")
+
    _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
+        status_cmd,
        system=system,
        capture_output=False,
        timeout=10,
@@ -1624,6 +1758,19 @@ def systemd_status(deep: bool = False, system: bool = False):
        for line in runtime_lines:
            print(f"  {line}")

+    unit_props = _read_systemd_unit_properties(system=system)
+    active_state = unit_props.get("ActiveState", "")
+    sub_state = unit_props.get("SubState", "")
+    exec_main_status = unit_props.get("ExecMainStatus", "")
+    result_code = unit_props.get("Result", "")
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
+    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
+        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
+        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+    elif active_state == "failed" and result_code:
+        print(f"  ⚠ Systemd unit result: {result_code}")
+
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1639,7 +1786,10 @@ def systemd_status(deep: bool = False, system: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
+        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
+        if full:
+            log_cmd.append("-l")
+        subprocess.run(log_cmd, timeout=10)


 # =============================================================================
@@ -2639,9 +2789,120 @@ def _setup_dingtalk():


 def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
-    _setup_standard_platform(wecom_platform)
+    """Interactive setup for WeCom — scan QR code or manual credential input."""
+    print()
+    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
+
+    existing_bot_id = get_env_value("WECOM_BOT_ID")
+    existing_secret = get_env_value("WECOM_SECRET")
+    if existing_bot_id and existing_secret:
+        print()
+        print_success("WeCom is already configured.")
+        if not prompt_yes_no("  Reconfigure WeCom?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
+        "Enter existing Bot ID and Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
+
+    bot_id = None
+    secret = None
+
+    if method_idx == 0:
+        # ── QR scan flow ──
+        try:
+            from gateway.platforms.wecom import qr_scan_for_bot_info
+        except Exception as exc:
+            print_error(f"  WeCom QR scan import failed: {exc}")
+            qr_scan_for_bot_info = None
+
+        if qr_scan_for_bot_info is not None:
+            try:
+                credentials = qr_scan_for_bot_info()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  WeCom setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR scan failed: {exc}")
+                credentials = None
+            if credentials:
+                bot_id = credentials.get("bot_id", "")
+                secret = credentials.get("secret", "")
+                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
+
+        if not bot_id or not secret:
+            print_info("  QR scan did not complete. Continuing with manual input.")
+            bot_id = None
+            secret = None
+
+    # ── Manual credential input ──
+    if not bot_id or not secret:
+        print()
+        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
+        print_info("  2. Select API Mode")
+        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
+        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
+        print()
+        bot_id = prompt("  Bot ID", password=False)
+        if not bot_id:
+            print_warning("  Skipped — WeCom won't work without a Bot ID.")
+            return
+        secret = prompt("  Secret", password=True)
+        if not secret:
+            print_warning("  Skipped — WeCom won't work without a Secret.")
+            return
+
+    # ── Save core credentials ──
+    save_env_value("WECOM_BOT_ID", bot_id)
+    save_env_value("WECOM_SECRET", secret)
+
+    # ── Allowed users (deny-by-default security) ──
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter user IDs to create an allowlist, or leave empty.")
+    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
+    if allowed:
+        cleaned = allowed.replace(" ", "")
+        save_env_value("WECOM_ALLOWED_USERS", cleaned)
+        print_success("  Saved — only these users can interact with the bot.")
+    else:
+        print()
+        access_choices = [
+            "Enable open access (anyone can message the bot)",
+            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
+            "Disable direct messages",
+            "Skip for now (bot will deny all users until configured)",
+        ]
+        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
+        if access_idx == 0:
+            save_env_value("WECOM_DM_POLICY", "open")
+            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
+            print_warning("  Open access enabled — anyone can use your bot!")
+        elif access_idx == 1:
+            save_env_value("WECOM_DM_POLICY", "pairing")
+            print_success("  DM pairing mode — users will receive a code to request access.")
+            print_info("  Approve with: hermes pairing approve <platform> <code>")
+        elif access_idx == 2:
+            save_env_value("WECOM_DM_POLICY", "disabled")
+            print_warning("  Direct messages disabled.")
+        else:
+            print_info("  Skipped — configure later with 'hermes gateway setup'")
+
+    # ── Home channel (optional) ──
+    print()
+    print_info("  Chat ID for scheduled results and notifications.")
+    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home:
+        save_env_value("WECOM_HOME_CHANNEL", home)
+        print_success(f"  Home channel set to {home}")
+
+    print()
+    print_success("💬 WeCom configured!")


 def _is_service_installed() -> bool:
@@ -3021,7 +3282,8 @@ def _setup_qqbot():
    if method_idx == 0:
        # ── QR scan-to-configure ──
        try:
-            credentials = _qqbot_qr_flow()
+            from gateway.platforms.qqbot import qr_register
+            credentials = qr_register()
        except KeyboardInterrupt:
            print()
            print_warning("  QQ Bot setup cancelled.")
@@ -3103,106 +3365,6 @@ def _setup_qqbot():
    print_info(f"  App ID: {credentials['app_id']}")


-def _qqbot_render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    try:
-        import qrcode as _qr
-        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def _qqbot_qr_flow():
-    """Run the QR-code scan-to-configure flow.
-
-    Returns a dict with app_id, client_secret, user_openid on success,
-    or None on failure/cancel.
-    """
-    try:
-        from gateway.platforms.qqbot import (
-            create_bind_task, poll_bind_result, build_connect_url,
-            decrypt_secret, BindStatus,
-        )
-        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
-    except Exception as exc:
-        print_error(f"  QQBot onboard import failed: {exc}")
-        return None
-
-    import asyncio
-    import time
-
-    MAX_REFRESHES = 3
-    refresh_count = 0
-
-    while refresh_count <= MAX_REFRESHES:
-        loop = asyncio.new_event_loop()
-
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = loop.run_until_complete(create_bind_task())
-        except Exception as e:
-            print_warning(f"  Failed to create bind task: {e}")
-            loop.close()
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _qqbot_render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
-
-        # ── Poll loop (silent — keep QR visible at bottom) ──
-        try:
-            while True:
-                try:
-                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
-                        poll_bind_result(task_id)
-                    )
-                except Exception:
-                    time.sleep(ONBOARD_POLL_INTERVAL)
-                    continue
-
-                if status == BindStatus.COMPLETED:
-                    client_secret = decrypt_secret(encrypted_secret, aes_key)
-                    print()
-                    print_success(f"  QR scan complete! (App ID: {app_id})")
-                    if user_openid:
-                        print_info(f"  Scanner's OpenID: {user_openid}")
-                    return {
-                        "app_id": app_id,
-                        "client_secret": client_secret,
-                        "user_openid": user_openid,
-                    }
-
-                if status == BindStatus.EXPIRED:
-                    refresh_count += 1
-                    if refresh_count > MAX_REFRESHES:
-                        print()
-                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
-                        return None
-                    print()
-                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
-                    loop.close()
-                    break  # outer while creates a new task
-
-                time.sleep(ONBOARD_POLL_INTERVAL)
-        except KeyboardInterrupt:
-            loop.close()
-            raise
-        finally:
-            loop.close()
-
-    return None
-
-
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -3390,6 +3552,8 @@ def gateway_setup():
            _setup_feishu()
        elif platform["key"] == "qqbot":
            _setup_qqbot()
+        elif platform["key"] == "wecom":
+            _setup_wecom()
        else:
            _setup_standard_platform(platform)

@@ -3748,12 +3912,13 @@ def gateway_command(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
+        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system)
+            systemd_status(deep, system=system, full=full)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -1566,6 +1566,8 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
+    elif selected_provider == "stepfun":
+        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
    elif selected_provider in (
@@ -2165,7 +2167,6 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.models import (
        _PROVIDER_MODELS,
        get_pricing_for_provider,
-        filter_nous_free_models,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )
@@ -2208,10 +2209,8 @@ def _model_flow_nous(config, current_model="", args=None):
    # Check if user is on free tier
    free_tier = check_nous_free_tier()

-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
    unavailable_models: list[str] = []
    if free_tier:
        model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -3465,6 +3464,140 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")


+def _infer_stepfun_region(base_url: str) -> str:
+    """Infer the current StepFun region from the configured endpoint."""
+    normalized = (base_url or "").strip().lower()
+    if "api.stepfun.com" in normalized:
+        return "china"
+    return "international"
+
+
+def _stepfun_base_url_for_region(region: str) -> str:
+    from hermes_cli.auth import (
+        STEPFUN_STEP_PLAN_CN_BASE_URL,
+        STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    )
+
+    return (
+        STEPFUN_STEP_PLAN_CN_BASE_URL
+        if region == "china"
+        else STEPFUN_STEP_PLAN_INTL_BASE_URL
+    )
+
+
+def _model_flow_stepfun(config, current_model=""):
+    """StepFun Step Plan flow with region-specific endpoints."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.models import fetch_api_models
+
+    provider_id = "stepfun"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    current_base = ""
+    if base_url_env:
+        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, dict):
+            current_base = str(model_cfg.get("base_url") or "").strip()
+    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+    region_choices = [
+        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+    ]
+    ordered_regions = []
+    for region_key, label in region_choices:
+        if region_key == current_region:
+            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
+        else:
+            ordered_regions.append((region_key, label))
+    ordered_regions.append(("cancel", "Cancel"))
+
+    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+        print("No change.")
+        return
+
+    selected_region = ordered_regions[region_idx][0]
+    effective_base = _stepfun_base_url_for_region(selected_region)
+    if base_url_env:
+        save_env_value(base_url_env, effective_base)
+
+    live_models = fetch_api_models(existing_key, effective_base)
+    if live_models:
+        model_list = live_models
+        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+    else:
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        if model_list:
+            print(
+                f"  Could not auto-detect models from {pconfig.name} API — "
+                "showing Step Plan fallback catalog."
+            )
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        model.pop("api_mode", None)
+        save_config(cfg)
+        deactivate_provider()
+
+        config["model"] = dict(model)
+        print(f"Default model set to: {selected} (via {pconfig.name})")
+    else:
+        print("No change.")
+
+
 def _model_flow_bedrock_api_key(config, region, current_model=""):
    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.

@@ -6533,6 +6666,7 @@ For more help on a command:
            "zai",
            "kimi-coding",
            "kimi-coding-cn",
+            "stepfun",
            "minimax",
            "minimax-cn",
            "kilocode",
@@ -6754,6 +6888,12 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument(
+        "-l",
+        "--full",
+        action="store_true",
+        help="Show full, untruncated service/log output where supported",
+    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
    # Z.AI / GLM
    "glm":       ModelIdentity("z-ai", "glm"),

-    # StepFun
+    # Step Plan (StepFun)
    "step":      ModelIdentity("stepfun", "step"),

    # Xiaomi
@@ -678,6 +678,7 @@ def switch_model(
        _da = DIRECT_ALIASES.get(resolved_alias)
        if _da is not None and _da.base_url:
            base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
            if not api_key:
                api_key = "no-key-required"

@@ -42,7 +42,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2-pro",               ""),
+    ("xiaomi/mimo-v2.5-pro",             ""),
+    ("xiaomi/mimo-v2.5",                 ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -53,6 +54,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("stepfun/step-3.5-flash",          ""),
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
@@ -107,7 +109,8 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2-pro",
+        "xiaomi/mimo-v2.5-pro",
+        "xiaomi/mimo-v2.5",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -125,17 +128,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
        "z-ai/glm-5.1",
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
@@ -211,6 +212,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "stepfun": [
+        "step-3.5-flash",
+        "step-3.5-flash-2603",
+    ],
    "moonshot": [
        "kimi-k2.6",
        "kimi-k2.5",
@@ -362,17 +367,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
 _PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]

 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.


 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -386,35 +385,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
        return False


-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -478,8 +448,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
    """Split Nous models into (selectable, unavailable) based on user tier.

-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.

    For free-tier users: only free models are selectable; paid models
    are returned as unavailable (shown grayed out in the menu).
@@ -549,6 +518,157 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -585,6 +705,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
    ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -619,6 +740,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
@@ -1499,6 +1622,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
        except Exception:
            pass
+    if normalized == "stepfun":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("stepfun")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
    if normalized == "anthropic":
        live = _fetch_anthropic_models()
        if live:
@@ -133,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------

+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
@dataclass
 class PluginManifest:
    """Parsed representation of a plugin.yaml manifest."""
@@ -146,6 +149,23 @@ class PluginManifest:
    provides_hooks: List[str] = field(default_factory=list)
    source: str = ""        # "user", "project", or "entrypoint"
    path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""


@dataclass
@@ -263,6 +283,7 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
+        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -273,6 +294,13 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

+        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
+        ``"dias:7 formato:json"``) used by gateway adapters to surface the
+        command with an argument field — for example Discord's native slash
+        command picker. Plugin commands without ``args_hint`` register as
+        parameterless in Discord and still accept trailing text when invoked
+        as free-form chat.
+
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -300,6 +328,7 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
+            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -366,6 +395,33 @@ class PluginContext:
            self.manifest.name, engine.name,
        )

+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -465,11 +521,16 @@ class PluginManager:
        manifests: List[PluginManifest] = []

        # 1. Bundled plugins (<repo>/plugins/<name>/)
-        # Repo-shipped generic plugins live next to hermes_cli/.  Memory and
-        # context_engine subdirs are handled by their own discovery paths, so
-        # skip those names here.  Bundled plugins are discovered (so they
-        # show up in `hermes plugins`) but only loaded when added to
-        # `plugins.enabled` in config.yaml — opt-in like any other plugin.
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
        manifests.extend(
            self._scan_directory(
@@ -492,36 +553,69 @@ class PluginManager:
        manifests.extend(self._scan_entry_points())

        # Load each manifest (skip user-disabled plugins).
-        # Later sources override earlier ones on name collision — user plugins
-        # take precedence over bundled, project plugins take precedence over
-        # user.  Dedup here so we only load the final winner.
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
        disabled = _get_disabled_plugins()
        enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
        winners: Dict[str, PluginManifest] = {}
        for manifest in manifests:
-            winners[manifest.name] = manifest
+            winners[manifest.key or manifest.name] = manifest
        for manifest in winners.values():
-            # Explicit disable always wins.
-            if manifest.name in disabled:
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
                loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
                continue
-            # Opt-in gate: plugins must be in the enabled allow-list.
-            # If the allow-list is missing (None), treat as "nothing enabled"
-            # — users have to explicitly enable plugins to load them.
-            # Memory and context_engine providers are excluded from this gate
-            # since they have their own single-select config (memory.provider
-            # / context.engine), not the enabled list.
-            if enabled is None or manifest.name not in enabled:
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format(
-                    manifest.name
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
                )
-                self._plugins[manifest.name] = loaded
+                self._plugins[lookup_key] = loaded
                logger.debug(
-                    "Skipping '%s' (not in plugins.enabled)", manifest.name
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
                )
                continue
            self._load_plugin(manifest)
@@ -545,9 +639,37 @@ class PluginManager:
    ) -> List[PluginManifest]:
        """Read ``plugin.yaml`` manifests from subdirectories of *path*.

-        *skip_names* is an optional allow-list of names to ignore (used
-        for the bundled scan to exclude ``memory`` / ``context_engine``
-        subdirs that have their own discovery path).
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
        """
        manifests: List[PluginManifest] = []
        if not path.is_dir():
@@ -556,37 +678,112 @@ class PluginManager:
        for child in sorted(path.iterdir()):
            if not child.is_dir():
                continue
-            if skip_names and child.name in skip_names:
+            if depth == 0 and skip_names and child.name in skip_names:
                continue
            manifest_file = child / "plugin.yaml"
            if not manifest_file.exists():
                manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                continue

-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )

        return manifests

+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            # Auto-coerce user-installed memory providers to kind="exclusive"
+            # so they're routed to plugins/memory discovery instead of being
+            # loaded by the general PluginManager (which has no
+            # register_memory_provider on PluginContext). Mirrors the
+            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
+            # Bundled memory providers are already skipped via skip_names.
+            if kind == "standalone" and "kind" not in data:
+                init_file = plugin_dir / "__init__.py"
+                if init_file.exists():
+                    try:
+                        source_text = init_file.read_text(errors="replace")[:8192]
+                        if (
+                            "register_memory_provider" in source_text
+                            or "MemoryProvider" in source_text
+                        ):
+                            kind = "exclusive"
+                            logger.debug(
+                                "Plugin %s: detected memory provider, "
+                                "treating as kind='exclusive'",
+                                key,
+                            )
+                    except Exception:
+                        pass
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
    # -----------------------------------------------------------------------
    # Entry-point scanning
    # -----------------------------------------------------------------------
@@ -609,6 +806,7 @@ class PluginManager:
                    name=ep.name,
                    source="entrypoint",
                    path=ep.value,
+                    key=ep.name,
                )
                manifests.append(manifest)
        except Exception as exc:
@@ -670,10 +868,16 @@ class PluginManager:
            loaded.error = str(exc)
            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)

-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded

    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
        init_file = plugin_dir / "__init__.py"
        if not init_file.exists():
@@ -686,7 +890,9 @@ class PluginManager:
            ns_pkg.__package__ = _NS_PARENT
            sys.modules[_NS_PARENT] = ns_pkg

-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
        spec = importlib.util.spec_from_file_location(
            module_name,
            init_file,
@@ -767,10 +973,12 @@ class PluginManager:
    def list_plugins(self) -> List[Dict[str, Any]]:
        """Return a list of info dicts for all discovered plugins."""
        result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
            result.append(
                {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                    "version": loaded.manifest.version,
                    "description": loaded.manifest.description,
                    "source": loaded.manifest.source,
@@ -94,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="KIMI_BASE_URL",
    ),
+    "stepfun": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("STEPFUN_API_KEY",),
+        base_url_override="https://api.stepfun.ai/step_plan/v1",
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "minimax": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
@@ -210,6 +216,10 @@ ALIASES: Dict[str, str] = {
    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

+    # stepfun
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
+
    # minimax-cn
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
@@ -294,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "nous": "Nous Portal",
    "openai-codex": "OpenAI Codex",
    "copilot-acp": "GitHub Copilot ACP",
+    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
@@ -427,6 +438,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    """
    pdef = get_provider(provider)
    if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")

    # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -439,6 +460,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
        hostname = base_url_hostname(base_url)
        if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
            return "anthropic_messages"
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
        if hostname == "api.openai.com":
            return "codex_responses"
        if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
@@ -46,6 +46,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
      protocol under a ``/anthropic`` suffix — treat those as
      ``anthropic_messages`` transport instead of the default
      ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
    hostname = base_url_hostname(base_url)
@@ -55,6 +58,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
        return "codex_responses"
    if normalized.endswith("/anthropic"):
        return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
    return None


@@ -205,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # api.openai.com → codex_responses, api.x.ai → codex_responses).
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
            detected = _detect_api_mode_for_url(base_url)
            if detected:
                api_mode = detected
@@ -660,7 +666,8 @@ def _resolve_explicit_runtime(
            if configured_mode:
                api_mode = configured_mode
            else:
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
                detected = _detect_api_mode_for_url(base_url)
                if detected:
                    api_mode = detected
@@ -96,6 +96,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
@@ -408,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home):
            ("Browser Automation", False, missing_browser_hint)
        )

-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
    if subscription_features.image_gen.managed_by_nous:
        tool_status.append(("Image Generation (Nous subscription)", True, None))
    elif subscription_features.image_gen.available:
        tool_status.append(("Image Generation", True, None))
    else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))

    # TTS — show configured provider
    tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -781,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "zai": "Z.AI / GLM",
            "kimi-coding": "Kimi / Moonshot",
            "kimi-coding-cn": "Kimi / Moonshot (China)",
+            "stepfun": "StepFun Step Plan",
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
@@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      prompt: "#FFF8DC"                  # Prompt text color
      input_rule: "#CD7F32"              # Input area horizontal rule
      response_border: "#FFD700"         # Response box border (ANSI)
+      status_bar_bg: "#1a1a2e"           # Status bar background
+      status_bar_text: "#C0C0C0"         # Status bar default text
+      status_bar_strong: "#FFD700"       # Status bar highlighted text
+      status_bar_dim: "#8B8682"          # Status bar separators/muted text
+      status_bar_good: "#8FBC8F"         # Healthy context usage
+      status_bar_warn: "#FFD700"         # Warning context usage
+      status_bar_bad: "#FF8C00"          # High context usage
+      status_bar_critical: "#FF6B6B"     # Critical context usage
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF8DC",
            "input_rule": "#CD7F32",
            "response_border": "#FFD700",
+            "status_bar_bg": "#1a1a2e",
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
@@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F1E6CF",
            "input_rule": "#9F1C1C",
            "response_border": "#C7A96B",
+            "status_bar_bg": "#2A1212",
+            "status_bar_text": "#F1E6CF",
+            "status_bar_strong": "#C7A96B",
+            "status_bar_dim": "#6E584B",
+            "status_bar_good": "#7BC96F",
+            "status_bar_warn": "#C7A96B",
+            "status_bar_bad": "#DD4A3A",
+            "status_bar_critical": "#EF5350",
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
@@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#444444",
            "response_border": "#aaaaaa",
+            "status_bar_bg": "#1F1F1F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#E6EDF3",
+            "status_bar_dim": "#777777",
+            "status_bar_good": "#B5B5B5",
+            "status_bar_warn": "#AAAAAA",
+            "status_bar_bad": "#D0D0D0",
+            "status_bar_critical": "#F0F0F0",
            "session_label": "#888888",
            "session_border": "#555555",
        },
@@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#4169e1",
            "response_border": "#7eb8f6",
+            "status_bar_bg": "#151C2F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#7EB8F6",
+            "status_bar_dim": "#4B5563",
+            "status_bar_good": "#63D0A6",
+            "status_bar_warn": "#E6A855",
+            "status_bar_bad": "#F7A072",
+            "status_bar_critical": "#FF7A7A",
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
@@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#EAF7FF",
            "input_rule": "#2A6FB9",
            "response_border": "#5DB8F5",
+            "status_bar_bg": "#0F2440",
+            "status_bar_text": "#EAF7FF",
+            "status_bar_strong": "#A9DFFF",
+            "status_bar_dim": "#496884",
+            "status_bar_good": "#6ED7B0",
+            "status_bar_warn": "#5DB8F5",
+            "status_bar_bad": "#2A6FB9",
+            "status_bar_critical": "#D94F4F",
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
@@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F5F5F5",
            "input_rule": "#656565",
            "response_border": "#B7B7B7",
+            "status_bar_bg": "#202020",
+            "status_bar_text": "#D3D3D3",
+            "status_bar_strong": "#F5F5F5",
+            "status_bar_dim": "#656565",
+            "status_bar_good": "#B7B7B7",
+            "status_bar_warn": "#D3D3D3",
+            "status_bar_bad": "#E7E7E7",
+            "status_bar_critical": "#F5F5F5",
            "session_label": "#919191",
            "session_border": "#656565",
        },
@@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF0D4",
            "input_rule": "#C75B1D",
            "response_border": "#F29C38",
+            "status_bar_bg": "#2B160E",
+            "status_bar_text": "#FFF0D4",
+            "status_bar_strong": "#FFD39A",
+            "status_bar_dim": "#6C4724",
+            "status_bar_good": "#6BCB77",
+            "status_bar_warn": "#F29C38",
+            "status_bar_bad": "#E2832B",
+            "status_bar_critical": "#EF5350",
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
@@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    status_text = skin.get_color("status_bar_text", text)
+    status_strong = skin.get_color("status_bar_strong", title)
+    status_dim = skin.get_color("status_bar_dim", dim)
+    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
+    status_warn = skin.get_color("status_bar_warn", warn)
+    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
+    status_critical = skin.get_color("status_bar_critical", error)
    voice_bg = skin.get_color("voice_status_bg", status_bg)
    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {text}",
-        "status-bar-strong": f"bg:{status_bg} {title} bold",
-        "status-bar-dim": f"bg:{status_bg} {dim}",
-        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
-        "status-bar-warn": f"bg:{status_bg} {warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
-        "status-bar-critical": f"bg:{status_bg} {error} bold",
+        "status-bar": f"bg:{status_bg} {status_text}",
+        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
+        "status-bar-dim": f"bg:{status_bg} {status_dim}",
+        "status-bar-good": f"bg:{status_bg} {status_good} bold",
+        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
+        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
        "completion-menu": f"bg:{menu_bg} {text}",
@@ -122,6 +122,7 @@ def show_status(args):
        "OpenAI": "OPENAI_API_KEY",
        "Z.AI/GLM": "GLM_API_KEY",
        "Kimi": "KIMI_API_KEY",
+        "StepFun Step Plan": "STEPFUN_API_KEY",
        "MiniMax": "MINIMAX_API_KEY",
        "MiniMax-CN": "MINIMAX_CN_API_KEY",
        "Firecrawl": "FIRECRAWL_API_KEY",
@@ -252,6 +253,7 @@ def show_status(args):
    apikey_providers = {
        "Z.AI / GLM":       ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
        "Kimi / Moonshot":  ("KIMI_API_KEY",),
+        "StepFun Step Plan": ("STEPFUN_API_KEY",),
        "MiniMax":          ("MINIMAX_API_KEY",),
        "MiniMax (China)":  ("MINIMAX_CN_API_KEY",),
    }
@@ -847,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
    """Return provider entries visible for the current auth/config state."""
    features = get_nous_subscription_features(config)
@@ -857,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
        if provider.get("requires_nous_auth") and not features.nous_auth_present:
            continue
        visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
    return visible


@@ -876,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
        browser_cfg = config.get("browser", {})
        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
    if ts_key == "image_gen":
-        return not fal_key_is_configured()
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True

    return not _toolset_has_keys(ts_key, config)

@@ -1095,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
    _print_success(f"  Model set to: {chosen}")


+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1151,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
        return

    # Prompt for each required env var
@@ -1189,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):

    if all_configured:
        _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"


 def _configure_simple_requirements(ts_key: str):
@@ -2189,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as reasoning_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ?
            GROUP BY day ORDER BY day
        """, (cutoff,))
@@ -2200,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(input_tokens) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ? AND model IS NOT NULL
            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
        """, (cutoff,))
@@ -2213,7 +2215,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as total_reasoning,
                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
            FROM sessions WHERE started_at > ?
        """, (cutoff,))
        totals = dict(cur3.fetchone())
@@ -31,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 8

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
    cost_source TEXT,
    pricing_version TEXT,
    title TEXT,
+    api_call_count INTEGER DEFAULT 0,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages (
    token_count INTEGER,
    finish_reason TEXT,
    reasoning TEXT,
+    reasoning_content TEXT,
    reasoning_details TEXT,
    codex_reasoning_items TEXT
 );

+CREATE TABLE IF NOT EXISTS state_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
@@ -329,6 +336,26 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: preserve provider-native reasoning_content separately from
+                # normalized reasoning text. Kimi/Moonshot replay can require
+                # this field on assistant tool-call messages when thinking is on.
+                try:
+                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")
+            if current_version < 8:
+                # v8: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 8")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -435,6 +462,7 @@ class SessionDB:
        billing_provider: Optional[str] = None,
        billing_base_url: Optional[str] = None,
        billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
        absolute: bool = False,
    ) -> None:
        """Update token counters and backfill model if not already set.
@@ -464,7 +492,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                   WHERE id = ?"""
        else:
            sql = """UPDATE sessions SET
@@ -484,7 +513,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                   WHERE id = ?"""
        params = (
            input_tokens,
@@ -502,6 +532,7 @@ class SessionDB:
            billing_base_url,
            billing_mode,
            model,
+            api_call_count,
            session_id,
        )
        def _do(conn):
@@ -922,6 +953,7 @@ class SessionDB:
        token_count: int = None,
        finish_reason: str = None,
        reasoning: str = None,
+        reasoning_content: str = None,
        reasoning_details: Any = None,
        codex_reasoning_items: Any = None,
    ) -> int:
@@ -951,8 +983,8 @@ class SessionDB:
            cursor = conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -964,6 +996,7 @@ class SessionDB:
                    token_count,
                    finish_reason,
                    reasoning,
+                    reasoning_content,
                    reasoning_details_json,
                    codex_items_json,
                ),
@@ -1014,7 +1047,7 @@ class SessionDB:
        with self._lock:
            cursor = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                (session_id,),
            )
@@ -1038,6 +1071,8 @@ class SessionDB:
            if row["role"] == "assistant":
                if row["reasoning"]:
                    msg["reasoning"] = row["reasoning"]
+                if row["reasoning_content"] is not None:
+                    msg["reasoning_content"] = row["reasoning_content"]
                if row["reasoning_details"]:
                    try:
                        msg["reasoning_details"] = json.loads(row["reasoning_details"])
@@ -1441,3 +1476,116 @@ class SessionDB:
            return len(session_ids)

        return self._execute_write(_do)
+
+    # ── Meta key/value (for scheduler bookkeeping) ──
+
+    def get_meta(self, key: str) -> Optional[str]:
+        """Read a value from the state_meta key/value store."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?", (key,)
+            ).fetchone()
+        if row is None:
+            return None
+        return row["value"] if isinstance(row, sqlite3.Row) else row[0]
+
+    def set_meta(self, key: str, value: str) -> None:
+        """Write a value to the state_meta key/value store."""
+        def _do(conn):
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                (key, value),
+            )
+        self._execute_write(_do)
+
+    # ── Space reclamation ──
+
+    def vacuum(self) -> None:
+        """Run VACUUM to reclaim disk space after large deletes.
+
+        SQLite does not shrink the database file when rows are deleted —
+        freed pages just get reused on the next insert. After a prune that
+        removed hundreds of sessions, the file stays bloated unless we
+        explicitly VACUUM.
+
+        VACUUM rewrites the entire DB, so it's expensive (seconds per
+        100MB) and cannot run inside a transaction. It also acquires an
+        exclusive lock, so callers must ensure no other writers are
+        active. Safe to call at startup before the gateway/CLI starts
+        serving traffic.
+        """
+        # VACUUM cannot be executed inside a transaction.
+        with self._lock:
+            # Best-effort WAL checkpoint first, then VACUUM.
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except Exception:
+                pass
+            self._conn.execute("VACUUM")
+
+    def maybe_auto_prune_and_vacuum(
+        self,
+        retention_days: int = 90,
+        min_interval_hours: int = 24,
+        vacuum: bool = True,
+    ) -> Dict[str, Any]:
+        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.
+
+        Records the last run timestamp in state_meta so subsequent calls
+        within ``min_interval_hours`` no-op. Designed to be called once at
+        startup from long-lived entrypoints (CLI, gateway, cron scheduler).
+
+        Never raises. On any failure, logs a warning and returns a dict
+        with ``"error"`` set.
+
+        Returns a dict with keys:
+          - ``"skipped"`` (bool) — true if within min_interval_hours of last run
+          - ``"pruned"`` (int)   — number of sessions deleted
+          - ``"vacuumed"`` (bool) — true if VACUUM ran
+          - ``"error"`` (str, optional) — present only on failure
+        """
+        result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False}
+        try:
+            # Skip if another process/call did maintenance recently.
+            last_raw = self.get_meta("last_auto_prune")
+            now = time.time()
+            if last_raw:
+                try:
+                    last_ts = float(last_raw)
+                    if now - last_ts < min_interval_hours * 3600:
+                        result["skipped"] = True
+                        return result
+                except (TypeError, ValueError):
+                    pass  # corrupt meta; treat as no prior run
+
+            pruned = self.prune_sessions(older_than_days=retention_days)
+            result["pruned"] = pruned
+
+            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
+            # is wasted I/O. Threshold keeps small DBs from paying the cost.
+            if vacuum and pruned > 0:
+                try:
+                    self.vacuum()
+                    result["vacuumed"] = True
+                except Exception as exc:
+                    logger.warning("state.db VACUUM failed: %s", exc)
+
+            # Record the attempt even if pruned == 0, so we don't retry
+            # every startup within the min_interval_hours window.
+            self.set_meta("last_auto_prune", str(now))
+
+            if pruned > 0:
+                logger.info(
+                    "state.db auto-maintenance: pruned %d session(s) older than %d days%s",
+                    pruned,
+                    retention_days,
+                    " + VACUUM" if result["vacuumed"] else "",
+                )
+        except Exception as exc:
+            # Maintenance must never block startup. Log and return error marker.
+            logger.warning("state.db auto-maintenance failed: %s", exc)
+            result["error"] = str(exc)
+
+        return result
+
@@ -108,9 +108,15 @@ def _run_async(coro):
    if loop and loop.is_running():
        # Inside an async context (gateway, RL env) — run in a fresh thread.
        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, coro)
+        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = pool.submit(asyncio.run, coro)
+        try:
            return future.result(timeout=300)
+        except concurrent.futures.TimeoutError:
+            future.cancel()
+            raise
+        finally:
+            pool.shutdown(wait=False, cancel_futures=True)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -0,0 +1,5 @@
+# Web Development
+
+Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps.
+
+These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites.
@@ -0,0 +1,189 @@
+---
+name: page-agent
+description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single <script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill username as John"). No Python, no headless browser, no extension required. Use this skill when the user is a web developer who wants to add an AI copilot to their SaaS / admin panel / B2B tool, make a legacy web app accessible via natural language, or evaluate page-agent against a local (Ollama) or cloud (Qwen / OpenAI / OpenRouter) LLM. NOT for server-side browser automation — point those users to Hermes' built-in browser tool instead.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [web, javascript, agent, browser, gui, alibaba, embed, copilot, saas]
+    category: web-development
+---
+
+# page-agent
+
+alibaba/page-agent (https://github.com/alibaba/page-agent, 17k+ stars, MIT) is an in-page GUI agent written in TypeScript. It lives inside a webpage, reads the DOM as text (no screenshots, no multi-modal LLM), and executes natural-language instructions like "click the login button, then fill username as John" against the current page. Pure client-side — the host site just includes a script and passes an OpenAI-compatible LLM endpoint.
+
+## When to use this skill
+
+Load this skill when a user wants to:
+
+- **Ship an AI copilot inside their own web app** (SaaS, admin panel, B2B tool, ERP, CRM) — "users on my dashboard should be able to type 'create invoice for Acme Corp and email it' instead of clicking through five screens"
+- **Modernize a legacy web app** without rewriting the frontend — page-agent drops on top of existing DOM
+- **Add accessibility via natural language** — voice / screen-reader users drive the UI by describing what they want
+- **Demo or evaluate page-agent** against a local (Ollama) or hosted (Qwen, OpenAI, OpenRouter) LLM
+- **Build interactive training / product demos** — let an AI walk a user through "how to submit an expense report" live in the real UI
+
+## When NOT to use this skill
+
+- User wants **Hermes itself to drive a browser** → use Hermes' built-in browser tool (Browserbase / Camofox). page-agent is the *opposite* direction.
+- User wants **cross-tab automation without embedding** → use Playwright, browser-use, or the page-agent Chrome extension
+- User needs **visual grounding / screenshots** → page-agent is text-DOM only; use a multimodal browser agent instead
+
+## Prerequisites
+
+- Node 22.13+ or 24+, npm 10+ (docs claim 11+ but 10.9 works fine)
+- An OpenAI-compatible LLM endpoint: Qwen (DashScope), OpenAI, Ollama, OpenRouter, or anything speaking `/v1/chat/completions`
+- Browser with devtools (for debugging)
+
+## Path 1 — 30-second demo via CDN (no install)
+
+Fastest way to see it work. Uses alibaba's free testing LLM proxy — **for evaluation only**, subject to their terms.
+
+Add to any HTML page (or paste into the devtools console as a bookmarklet):
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js" crossorigin="true"></script>
+```
+
+A panel appears. Type an instruction. Done.
+
+Bookmarklet form (drop into bookmarks bar, click on any page):
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})();
+```
+
+## Path 2 — npm install into your own web app (production use)
+
+Inside an existing web project (React / Vue / Svelte / plain):
+
+```bash
+npm install page-agent
+```
+
+Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**:
+
+```javascript
+import { PageAgent } from 'page-agent'
+
+const agent = new PageAgent({
+    model: 'qwen3.5-plus',
+    baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKey: process.env.LLM_API_KEY,   // never hardcode
+    language: 'en-US',
+})
+
+// Show the panel for end users:
+agent.panel.show()
+
+// Or drive it programmatically:
+await agent.execute('Click submit button, then fill username as John')
+```
+
+Provider examples (any OpenAI-compatible endpoint works):
+
+| Provider | `baseURL` | `model` |
+|----------|-----------|---------|
+| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` |
+| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` |
+| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` |
+| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` |
+
+**Key config fields** (passed to `new PageAgent({...})`):
+
+- `model`, `baseURL`, `apiKey` — LLM connection
+- `language` — UI language (`en-US`, `zh-CN`, etc.)
+- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list
+
+**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation.
+
+## Path 3 — clone the source repo (contributing, or hacking on it)
+
+Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension.
+
+```bash
+git clone https://github.com/alibaba/page-agent.git
+cd page-agent
+npm ci              # exact lockfile install (or `npm i` to allow updates)
+```
+
+Create `.env` in the repo root with an LLM endpoint. Example:
+
+```
+LLM_MODEL_NAME=gpt-4o-mini
+LLM_API_KEY=sk-...
+LLM_BASE_URL=https://api.openai.com/v1
+```
+
+Ollama flavor:
+
+```
+LLM_BASE_URL=http://localhost:11434/v1
+LLM_API_KEY=NA
+LLM_MODEL_NAME=qwen3:14b
+```
+
+Common commands:
+
+```bash
+npm start           # docs/website dev server
+npm run build       # build every package
+npm run dev:demo    # serve IIFE bundle at http://localhost:5174/page-agent.demo.js
+npm run dev:ext     # develop the browser extension (WXT + React)
+npm run build:ext   # build the extension
+```
+
+**Test on any website** using the local IIFE bundle. Add this bookmarklet:
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})();
+```
+
+Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save.
+
+**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.)
+
+## Repo layout (Path 3)
+
+Monorepo with npm workspaces. Key packages:
+
+| Package | Path | Purpose |
+|---------|------|---------|
+| `page-agent` | `packages/page-agent/` | Main entry with UI panel |
+| `@page-agent/core` | `packages/core/` | Core agent logic, no UI |
+| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) |
+| — | `packages/llms/` | LLM client |
+| — | `packages/page-controller/` | DOM ops + visual feedback |
+| — | `packages/ui/` | Panel + i18n |
+| — | `packages/extension/` | Chrome/Firefox extension |
+| — | `packages/website/` | Docs + landing site |
+
+## Verifying it works
+
+After Path 1 or Path 2:
+1. Open the page in a browser with devtools open
+2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key)
+3. Type a simple instruction matching something visible on the page ("click the Login link")
+4. Watch the Network tab — you should see a request to your `baseURL`
+
+After Path 3:
+1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174`
+2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript`
+3. Click the bookmarklet on any site; panel appears
+
+## Pitfalls
+
+- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use.
+- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments.
+- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front.
+- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin.
+- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup.
+- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors.
+- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine.
+
+## Reference
+
+- Repo: https://github.com/alibaba/page-agent
+- Docs: https://alibaba.github.io/page-agent/
+- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic)
@@ -16,8 +16,8 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camofox-browser": "^1.5.2"
+    "@askjo/camofox-browser": "^1.5.2",
+    "agent-browser": "^0.26.0"
  },
  "overrides": {
    "lodash": "4.18.1"
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
@@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json`
 | `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
 | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
 | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
-| `tags` | — | Tags applied when storing memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |

 ### Integration

@@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes:

 | Tool | Description |
 |------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |

@@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes.
 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.

 Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
+  HINDSIGHT_API_KEY                — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID                — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET                 — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL                — API endpoint
+  HINDSIGHT_MODE                   — cloud or local (default: cloud)
+  HINDSIGHT_RETAIN_TAGS            — comma-separated tags attached to retained memories
+  HINDSIGHT_RETAIN_SOURCE          — metadata source value attached to retained memories
+  HINDSIGHT_RETAIN_USER_PREFIX     — label used before user turns in retained transcripts
+  HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts

 Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 ~/.hindsight/config.json (legacy, shared) for backward compatibility.
@@ -24,7 +28,7 @@ import logging
 import os
 import threading

-from hermes_constants import get_hermes_home
+from datetime import datetime, timezone
 from typing import Any, Dict, List

 from agent.memory_provider import MemoryProvider
@@ -99,6 +103,11 @@ RETAIN_SCHEMA = {
        "properties": {
            "content": {"type": "string", "description": "The information to store."},
            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional per-call tags to merge with configured default retain tags.",
+            },
        },
        "required": ["content"],
    },
@@ -168,6 +177,10 @@ def _load_config() -> dict:
    return {
        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""),
+        "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""),
+        "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"),
+        "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"),
        "banks": {
            "hermes": {
                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
@@ -178,6 +191,48 @@ def _load_config() -> dict:
    }


+def _normalize_retain_tags(value: Any) -> List[str]:
+    """Normalize tag config/tool values to a deduplicated list of strings."""
+    if value is None:
+        return []
+
+    raw_items: list[Any]
+    if isinstance(value, list):
+        raw_items = value
+    elif isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return []
+        if text.startswith("["):
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, list):
+                raw_items = parsed
+            else:
+                raw_items = text.split(",")
+        else:
+            raw_items = text.split(",")
+    else:
+        raw_items = [value]
+
+    normalized = []
+    seen = set()
+    for item in raw_items:
+        tag = str(item).strip()
+        if not tag or tag in seen:
+            continue
+        seen.add(tag)
+        normalized.append(tag)
+    return normalized
+
+
+def _utc_timestamp() -> str:
+    """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix."""
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider implementation
 # ---------------------------------------------------------------------------
@@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider):
        self._llm_base_url = ""
        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
        self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._retain_tags: List[str] = []
+        self._retain_source = ""
+        self._retain_user_prefix = "User"
+        self._retain_assistant_prefix = "Assistant"
+        self._platform = ""
+        self._user_id = ""
+        self._user_name = ""
+        self._chat_id = ""
+        self._chat_name = ""
+        self._chat_type = ""
+        self._thread_id = ""
+        self._agent_identity = ""
+        self._turn_index = 0
        self._client = None
        self._prefetch_result = ""
        self._prefetch_lock = threading.Lock()
@@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider):
        # Retain controls
        self._auto_retain = True
        self._retain_every_n_turns = 1
+        self._retain_async = True
        self._retain_context = "conversation between Hermes Agent and the User"
        self._turn_counter = 0
        self._session_turns: list[str] = []  # accumulates ALL turns for the session
@@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider):
        # Bank
        self._bank_mission = ""
        self._bank_retain_mission: str | None = None
-        self._retain_async = True

    @property
    def name(self) -> str:
@@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider):
            {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
            {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-            {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
+            {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""},
+            {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""},
+            {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"},
+            {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
            {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
            {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
            {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
@@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider):
        return self._client

    def initialize(self, session_id: str, **kwargs) -> None:
-        self._session_id = session_id
+        self._session_id = str(session_id or "").strip()

        # Check client version and auto-upgrade if needed
        try:
@@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider):
            pass  # packaging not available or other issue — proceed anyway

        self._config = _load_config()
+        self._platform = str(kwargs.get("platform") or "").strip()
+        self._user_id = str(kwargs.get("user_id") or "").strip()
+        self._user_name = str(kwargs.get("user_name") or "").strip()
+        self._chat_id = str(kwargs.get("chat_id") or "").strip()
+        self._chat_name = str(kwargs.get("chat_name") or "").strip()
+        self._chat_type = str(kwargs.get("chat_type") or "").strip()
+        self._thread_id = str(kwargs.get("thread_id") or "").strip()
+        self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
+        self._turn_index = 0
+        self._session_turns = []
        self._mode = self._config.get("mode", "cloud")
        # "local" is a legacy alias for "local_embedded"
        if self._mode == "local":
@@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider):
        memory_mode = self._config.get("memory_mode", "hybrid")
        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"

-        prefetch_method = self._config.get("recall_prefetch_method", "recall")
+        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"

        # Bank options
@@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider):
        self._bank_retain_mission = self._config.get("bank_retain_mission") or None

        # Tags
-        self._tags = self._config.get("tags") or None
+        self._retain_tags = _normalize_retain_tags(
+            self._config.get("retain_tags")
+            or os.environ.get("HINDSIGHT_RETAIN_TAGS", "")
+        )
+        self._tags = self._retain_tags or None
        self._recall_tags = self._config.get("recall_tags") or None
        self._recall_tags_match = self._config.get("recall_tags_match", "any")
+        self._retain_source = str(
+            self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "")
+        ).strip()
+        self._retain_user_prefix = str(
+            self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User")
+        ).strip() or "User"
+        self._retain_assistant_prefix = str(
+            self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant")
+        ).strip() or "Assistant"

        # Retain controls
        self._auto_retain = self._config.get("auto_retain", True)
@@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider):
        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
        logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
-                     "retain_async=%s, retain_context=%s, "
-                     "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
+                     "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
                     self._auto_retain, self._auto_recall, self._retain_every_n_turns,
-                     self._retain_async, self._retain_context,
-                     self._recall_max_tokens, self._recall_max_input_chars,
+                     self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars,
                     self._tags, self._recall_tags)

        # For local mode, start the embedded daemon in the background so it
@@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider):
        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
        self._prefetch_thread.start()

+    def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]:
+        now = datetime.now(timezone.utc).isoformat()
+        return [
+            {
+                "role": "user",
+                "content": f"{self._retain_user_prefix}: {user_content}",
+                "timestamp": now,
+            },
+            {
+                "role": "assistant",
+                "content": f"{self._retain_assistant_prefix}: {assistant_content}",
+                "timestamp": now,
+            },
+        ]
+
+    def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]:
+        metadata: Dict[str, str] = {
+            "retained_at": _utc_timestamp(),
+            "message_count": str(message_count),
+            "turn_index": str(turn_index),
+        }
+        if self._retain_source:
+            metadata["source"] = self._retain_source
+        if self._session_id:
+            metadata["session_id"] = self._session_id
+        if self._platform:
+            metadata["platform"] = self._platform
+        if self._user_id:
+            metadata["user_id"] = self._user_id
+        if self._user_name:
+            metadata["user_name"] = self._user_name
+        if self._chat_id:
+            metadata["chat_id"] = self._chat_id
+        if self._chat_name:
+            metadata["chat_name"] = self._chat_name
+        if self._chat_type:
+            metadata["chat_type"] = self._chat_type
+        if self._thread_id:
+            metadata["thread_id"] = self._thread_id
+        if self._agent_identity:
+            metadata["agent_identity"] = self._agent_identity
+        return metadata
+
+    def _build_retain_kwargs(
+        self,
+        content: str,
+        *,
+        context: str | None = None,
+        document_id: str | None = None,
+        metadata: Dict[str, str] | None = None,
+        tags: List[str] | None = None,
+        retain_async: bool | None = None,
+    ) -> Dict[str, Any]:
+        kwargs: Dict[str, Any] = {
+            "bank_id": self._bank_id,
+            "content": content,
+            "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index),
+        }
+        if context is not None:
+            kwargs["context"] = context
+        if document_id:
+            kwargs["document_id"] = document_id
+        if retain_async is not None:
+            kwargs["retain_async"] = retain_async
+        merged_tags = _normalize_retain_tags(self._retain_tags)
+        for tag in _normalize_retain_tags(tags):
+            if tag not in merged_tags:
+                merged_tags.append(tag)
+        if merged_tags:
+            kwargs["tags"] = merged_tags
+        return kwargs
+
    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
        """Retain conversation turn in background (non-blocking).

@@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider):
            logger.debug("sync_turn: skipped (auto_retain disabled)")
            return

-        from datetime import datetime, timezone
-        now = datetime.now(timezone.utc).isoformat()
+        if session_id:
+            self._session_id = str(session_id).strip()

-        messages = [
-            {"role": "user", "content": user_content, "timestamp": now},
-            {"role": "assistant", "content": assistant_content, "timestamp": now},
-        ]
-
-        turn = json.dumps(messages)
+        turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
        self._session_turns.append(turn)
        self._turn_counter += 1
+        self._turn_index = self._turn_counter

-        # Only retain every N turns
        if self._turn_counter % self._retain_every_n_turns != 0:
            logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
                         self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
@@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider):

        logger.debug("sync_turn: retaining %d turns, total session content %d chars",
                     len(self._session_turns), sum(len(t) for t in self._session_turns))
-        # Send the ENTIRE session as a single JSON array (document_id deduplicates).
-        # Each element in _session_turns is a JSON string of that turn's messages.
        content = "[" + ",".join(self._session_turns) + "]"

        def _sync():
            try:
                client = self._get_client()
-                item: dict = {
-                    "content": content,
-                    "context": self._retain_context,
-                }
-                if self._tags:
-                    item["tags"] = self._tags
+                item = self._build_retain_kwargs(
+                    content,
+                    context=self._retain_context,
+                    metadata=self._build_metadata(
+                        message_count=len(self._session_turns) * 2,
+                        turn_index=self._turn_index,
+                    ),
+                )
+                item.pop("bank_id", None)
+                item.pop("retain_async", None)
                logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
                             self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
                _run_sync(client.aretain_batch(
@@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider):
                return tool_error("Missing required parameter: content")
            context = args.get("context")
            try:
-                retain_kwargs: dict = {
-                    "bank_id": self._bank_id, "content": content, "context": context,
-                }
-                if self._tags:
-                    retain_kwargs["tags"] = self._tags
+                retain_kwargs = self._build_retain_kwargs(
+                    content,
+                    context=context,
+                    tags=args.get("tags"),
+                )
                logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
                             self._bank_id, len(content), context)
                _run_sync(client.aretain(**retain_kwargs))
@@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -44,16 +44,21 @@ AUTHOR_MAP = {
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
    # contributors (from noreply pattern)
+    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
    "snreynolds2506@gmail.com": "snreynolds",
    "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
    "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
    "massivemassimo@users.noreply.github.com": "MassiveMassimo",
    "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "keifergu@tencent.com": "keifergu",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "harryykyle1@gmail.com": "hharry11",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
+    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
    "valdi.jorge@gmail.com": "jvcl",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
@@ -91,6 +96,8 @@ AUTHOR_MAP = {
    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
    "nocoo@users.noreply.github.com": "nocoo",
    "30841158+n-WN@users.noreply.github.com": "n-WN",
+    "tsuijinglei@gmail.com": "hiddenpuppy",
+    "jerome@clawwork.ai": "HiddenPuppy",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
    "i@troy-y.org": "TroyMitchell911",
@@ -98,6 +105,7 @@ AUTHOR_MAP = {
    "hansnow@users.noreply.github.com": "hansnow",
    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
    "ben.burtenshaw@gmail.com": "burtenshaw",
+    "roopaknijhara@gmail.com": "rnijhara",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
    "liujinkun@bytedance.com": "liujinkun2025",
@@ -174,6 +182,7 @@ AUTHOR_MAP = {
    "adavyasharma@gmail.com": "adavyas",
    "acaayush1111@gmail.com": "aayushchaudhary",
    "jason@outland.art": "jasonoutland",
+    "73175452+Magaav@users.noreply.github.com": "Magaav",
    "mrflu1918@proton.me": "SPANISHFLU",
    "morganemoss@gmai.com": "mormio",
    "kopjop926@gmail.com": "cesareth",
@@ -278,6 +287,7 @@ AUTHOR_MAP = {
    "srhtsrht17@gmail.com": "Sertug17",
    "stephenschoettler@gmail.com": "stephenschoettler",
    "tanishq231003@gmail.com": "yyovil",
+    "taosiyuan163@153.com": "taosiyuan163",
    "tesseracttars@gmail.com": "tesseracttars-creator",
    "tianliangjay@gmail.com": "xingkongliang",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
@@ -333,6 +343,8 @@ AUTHOR_MAP = {
    "asslaenn5@gmail.com": "Aslaaen",
    "shalompmc0505@naver.com": "pinion05",
    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
+    "vivien000812@gmail.com": "iamagenius00",
+    "89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
 }


@@ -0,0 +1,77 @@
+# Port Notes — baoyu-comic
+
+Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1.
+
+## Changes from upstream
+
+### SKILL.md adaptations
+
+| Change | Upstream | Hermes |
+|--------|----------|--------|
+| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) |
+| Trigger | Slash commands / CLI flags | Natural language skill matching |
+| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra |
+| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) |
+| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory |
+| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only |
+| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only |
+| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) |
+
+### Structural removals
+
+- **`references/config/` directory** (removed entirely):
+  - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md
+  - `preferences-schema.md` — EXTEND.md YAML schema
+  - `watermark-guide.md` — watermark config (tied to EXTEND.md)
+- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs.
+- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8.
+- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2.
+- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly.
+
+### Image generation strategy changes
+
+`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured:
+
+- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input.
+- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`.
+- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency.
+- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "<url>" -o <target>.png`) and verified before the workflow advances.
+
+### SKILL.md reductions
+
+- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions.
+- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references.
+- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed.
+- `auto-selection.md`: priority order dropped the EXTEND.md tier.
+- `analysis-framework.md`: language-priority comment updated (user option → conversation → source).
+
+### File naming convention
+
+Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention.
+
+### What was preserved verbatim
+
+- All 6 art-style definitions (`references/art-styles/`)
+- All 7 tone definitions (`references/tones/`)
+- All 7 layout definitions (`references/layouts/`)
+- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md`
+- Preset bodies (only the first few intro lines adapted; special rules unchanged)
+- Author, version, homepage attribution
+
+## Syncing with upstream
+
+To pull upstream updates:
+
+```bash
+# Compare versions
+curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5
+# Look for the version: line
+
+# Diff a reference file
+diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \
+     references/art-styles/manga.md
+```
+
+Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations.
+
+If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`.
@@ -0,0 +1,246 @@
+---
+name: baoyu-comic
+description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic".
+version: 1.56.1
+author: 宝玉 (JimLiu)
+license: MIT
+metadata:
+  hermes:
+    tags: [comic, knowledge-comic, creative, image-generation]
+    homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic
+---
+
+# Knowledge Comic Creator
+
+Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem.
+
+Create original knowledge comics with flexible art style × tone combinations.
+
+## When to Use
+
+Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language.
+
+## Reference Images
+
+Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt:
+
+**Intake**: Accept file paths when the user provides them (or pastes images in conversation).
+- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance
+- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback
+- No reference → skip this section
+
+**Usage modes** (per reference):
+
+| Usage | Effect |
+|-------|--------|
+| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body |
+| `palette` | Extract hex colors and append to every page's prompt body |
+| `scene` | Extract scene composition or subject notes and append to the relevant page(s) |
+
+**Record in each page's prompt frontmatter** when refs exist:
+
+```yaml
+references:
+  - ref_id: 01
+    filename: 01-ref-scene.png
+    usage: style
+    traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds"
+```
+
+Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`.
+
+## Options
+
+### Visual Dimensions
+
+| Option | Values | Description |
+|--------|--------|-------------|
+| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique |
+| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere |
+| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement |
+| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio |
+| Language | auto (default), zh, en, ja, etc. | Output language |
+| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. |
+
+### Partial Workflow Options
+
+| Option | Description |
+|--------|-------------|
+| Storyboard only | Generate storyboard only, skip prompts and images |
+| Prompts only | Generate storyboard + prompts, skip images |
+| Images only | Generate images from existing prompts directory |
+| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) |
+
+Details: [references/partial-workflows.md](references/partial-workflows.md)
+
+### Art, Tone & Preset Catalogue
+
+- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/<style>.md`.
+- **Tones** (7): `neutral`, `warm`, `dramatic`, `romantic`, `energetic`, `vintage`, `action`. Full definitions at `references/tones/<tone>.md`.
+- **Presets** (5) with special rules beyond plain art+tone:
+
+  | Preset | Equivalent | Hook |
+  |--------|-----------|------|
+  | `ohmsha` | manga + neutral | Visual metaphors, no talking heads, gadget reveals |
+  | `wuxia` | ink-brush + action | Qi effects, combat visuals, atmospheric |
+  | `shoujo` | manga + romantic | Decorative elements, eye details, romantic beats |
+  | `concept-story` | manga + warm | Visual symbol system, growth arc, dialogue+action balance |
+  | `four-panel` | minimalist + neutral + four-panel layout | 起承转合 structure, B&W + spot color, stick-figure characters |
+
+  Full rules at `references/presets/<preset>.md` — load the file when a preset is picked.
+
+- **Compatibility matrix** and **content-signal → preset** table live in [references/auto-selection.md](references/auto-selection.md). Read it before recommending combinations in Step 2.
+
+## File Structure
+
+Output directory: `comic/{topic-slug}/`
+- Slug: 2-4 words kebab-case from topic (e.g., `alan-turing-bio`)
+- Conflict: append timestamp (e.g., `turing-story-20260118-143052`)
+
+**Contents**:
+| File | Description |
+|------|-------------|
+| `source-{slug}.md` | Saved source content (kebab-case slug matches the output directory) |
+| `analysis.md` | Content analysis |
+| `storyboard.md` | Storyboard with panel breakdown |
+| `characters/characters.md` | Character definitions |
+| `characters/characters.png` | Character reference sheet (downloaded from `image_generate`) |
+| `prompts/NN-{cover\|page}-[slug].md` | Generation prompts |
+| `NN-{cover\|page}-[slug].png` | Generated images (downloaded from `image_generate`) |
+| `refs/NN-ref-{slug}.{ext}` | User-supplied reference images (optional, for provenance) |
+
+## Language Handling
+
+**Detection Priority**:
+1. User-specified language (explicit option)
+2. User's conversation language
+3. Source content language
+
+**Rule**: Use user's input language for ALL interactions:
+- Storyboard outlines and scene descriptions
+- Image generation prompts
+- User selection options and confirmations
+- Progress updates, questions, errors, summaries
+
+Technical terms remain in English.
+
+## Workflow
+
+### Progress Checklist
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing directory
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Generate character sheet (if needed) → characters/characters.png
+  - [ ] 7.2 Generate pages (with character descriptions embedded in prompt)
+- [ ] Step 8: Completion report
+```
+
+### Flow
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review?] → Prompts → [Review?] → Images → Complete
+```
+
+### Step Summary
+
+| Step | Action | Key Output |
+|------|--------|------------|
+| 1.1 | Analyze content | `analysis.md`, `source-{slug}.md` |
+| 1.2 | Check existing directory | Handle conflicts |
+| 2 | Confirm style, focus, audience, reviews | User preferences |
+| 3 | Generate storyboard + characters | `storyboard.md`, `characters/` |
+| 4 | Review outline (if requested) | User approval |
+| 5 | Generate prompts | `prompts/*.md` |
+| 6 | Review prompts (if requested) | User approval |
+| 7.1 | Generate character sheet (if needed) | `characters/characters.png` |
+| 7.2 | Generate pages | `*.png` files |
+| 8 | Completion report | Summary |
+
+### User Questions
+
+Use the `clarify` tool to confirm options. Since `clarify` handles one question at a time, ask the most important question first and proceed sequentially. See [references/workflow.md](references/workflow.md) for the full Step 2 question set.
+
+**Timeout handling (CRITICAL)**: `clarify` can return `"The user did not provide a response within the time limit. Use your best judgement to make the choice and proceed."` — this is NOT user consent to default everything.
+
+- Treat it as a default **for that one question only**. Continue asking the remaining Step 2 questions in sequence; each question is an independent consent point.
+- **Surface the default to the user visibly** in your next message so they have a chance to correct it: e.g. `"Style: defaulted to ohmsha preset (clarify timed out). Say the word to switch."` — an unreported default is indistinguishable from never having asked.
+- Do NOT collapse Step 2 into a single "use all defaults" pass after one timeout. If the user is genuinely absent, they will be equally absent for all five questions — but they can correct visible defaults when they return, and cannot correct invisible ones.
+
+### Step 7: Image Generation
+
+Use Hermes' built-in `image_generate` tool for all image rendering. Its schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`); it **returns a URL**, not a local file. Every generated page or character sheet must therefore be downloaded to the output directory.
+
+**Prompt file requirement (hard)**: write each image's full, final prompt to a standalone file under `prompts/` (naming: `NN-{type}-[slug].md`) BEFORE calling `image_generate`. The prompt file is the reproducibility record.
+
+**Aspect ratio mapping** — the storyboard's `aspect_ratio` field maps to `image_generate`'s format as follows:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download step** — after every `image_generate` call:
+1. Read the URL from the tool result
+2. Fetch the image bytes using an **absolute** output path, e.g.
+   `curl -fsSL "<url>" -o /abs/path/to/comic/<slug>/NN-page-<slug>.png`
+3. Verify the file exists and is non-empty at that exact path before proceeding to the next page
+
+**Never rely on shell CWD persistence for `-o` paths.** The terminal tool's persistent-shell CWD can change between batches (session expiry, `TERMINAL_LIFETIME_SECONDS`, a failed `cd` that leaves you in the wrong directory). `curl -o relative/path.png` is a silent footgun: if CWD has drifted, the file lands somewhere else with no error. **Always pass a fully-qualified absolute path to `-o`**, or pass `workdir=<abs path>` to the terminal tool. Incident Apr 2026: pages 06-09 of a 10-page comic landed at the repo root instead of `comic/<slug>/` because batch 3 inherited a stale CWD from batch 2 and `curl -o 06-page-skills.png` wrote to the wrong directory. The agent then spent several turns claiming the files existed where they didn't.
+
+**7.1 Character sheet** — generate it (to `characters/characters.png`, aspect `landscape`) when the comic is multi-page with recurring characters. Skip for simple presets (e.g., four-panel minimalist) or single-page comics. The prompt file at `characters/characters.md` must exist before invoking `image_generate`. The rendered PNG is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits — it does **not** drive Step 7.2. Page prompts are already written in Step 5 from the **text descriptions** in `characters/characters.md`; `image_generate` cannot accept images as visual input.
+
+**7.2 Pages** — each page's prompt MUST already be at `prompts/NN-{cover|page}-[slug].md` before invoking `image_generate`. Because `image_generate` is prompt-only, character consistency is enforced by **embedding character descriptions (sourced from `characters/characters.md`) inline in every page prompt during Step 5**. The embedding is done uniformly whether or not a PNG sheet is produced in 7.1; the PNG is only a review/regeneration aid.
+
+**Backup rule**: existing `prompts/…md` and `…png` files → rename with `-backup-YYYYMMDD-HHMMSS` suffix before regenerating.
+
+Full step-by-step workflow (analysis, storyboard, review gates, regeneration variants): [references/workflow.md](references/workflow.md).
+
+## References
+
+**Core Templates**:
+- [analysis-framework.md](references/analysis-framework.md) - Deep content analysis
+- [character-template.md](references/character-template.md) - Character definition format
+- [storyboard-template.md](references/storyboard-template.md) - Storyboard structure
+- [ohmsha-guide.md](references/ohmsha-guide.md) - Ohmsha manga specifics
+
+**Style Definitions**:
+- `references/art-styles/` - Art styles (ligne-claire, manga, realistic, ink-brush, chalk, minimalist)
+- `references/tones/` - Tones (neutral, warm, dramatic, romantic, energetic, vintage, action)
+- `references/presets/` - Presets with special rules (ohmsha, wuxia, shoujo, concept-story, four-panel)
+- `references/layouts/` - Layouts (standard, cinematic, dense, splash, mixed, webtoon, four-panel)
+
+**Workflow**:
+- [workflow.md](references/workflow.md) - Full workflow details
+- [auto-selection.md](references/auto-selection.md) - Content signal analysis
+- [partial-workflows.md](references/partial-workflows.md) - Partial workflow options
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | **Update prompt file FIRST** → regenerate image → download new PNG |
+| **Add** | Create prompt at position → generate with character descriptions embedded → renumber subsequent → update storyboard |
+| **Delete** | Remove files → renumber subsequent → update storyboard |
+
+**IMPORTANT**: When updating pages, ALWAYS update the prompt file (`prompts/NN-{cover|page}-[slug].md`) FIRST before regenerating. This ensures changes are documented and reproducible.
+
+## Pitfalls
+
+- Image generation: 10-30 seconds per page; auto-retry once on failure
+- **Always download** the URL returned by `image_generate` to a local PNG — downstream tooling (and the user's review) expects files in the output directory, not ephemeral URLs
+- **Use absolute paths for `curl -o`** — never rely on persistent-shell CWD across batches. Silent footgun: files land in the wrong directory and subsequent `ls` on the intended path shows nothing. See Step 7 "Download step".
+- Use stylized alternatives for sensitive public figures
+- **Step 2 confirmation required** - do not skip
+- **Steps 4/6 conditional** - only if user requested in Step 2
+- **Step 7.1 character sheet** - recommended for multi-page comics, optional for simple presets. The PNG is a review/regeneration aid; page prompts (written in Step 5) use the text descriptions in `characters/characters.md`, not the PNG. `image_generate` does not accept images as visual input
+- **Strip secrets** — scan source content for API keys, tokens, or credentials before writing any output file
@@ -0,0 +1,176 @@
+# Comic Content Analysis Framework
+
+Deep analysis framework for transforming source content into effective visual storytelling.
+
+## Purpose
+
+Before creating a comic, thoroughly analyze the source material to:
+- Identify the target audience and their needs
+- Determine what value the comic will deliver
+- Extract narrative potential for visual storytelling
+- Plan character arcs and key moments
+
+## Analysis Dimensions
+
+### 1. Core Content (Understanding "What")
+
+**Central Message**
+- What is the single most important idea readers should take away?
+- Can you express it in one sentence?
+
+**Key Concepts**
+- What are the essential concepts readers must understand?
+- How should these concepts be visualized?
+- Which concepts need simplified explanations?
+
+**Content Structure**
+- How is the source material organized?
+- What is the natural narrative arc?
+- Where are the climax and turning points?
+
+**Evidence & Examples**
+- What concrete examples, data, or stories support the main ideas?
+- Which examples translate well to visual panels?
+- What can be shown rather than told?
+
+### 2. Context & Background (Understanding "Why")
+
+**Source Origin**
+- Who created this content? What is their perspective?
+- What was the original purpose?
+- Is there bias to be aware of?
+
+**Historical/Cultural Context**
+- When and where does the story take place?
+- What background knowledge do readers need?
+- What period-specific visual elements are required?
+
+**Underlying Assumptions**
+- What does the source assume readers already know?
+- What implicit beliefs or values are present?
+- Should the comic challenge or reinforce these?
+
+### 3. Audience Analysis
+
+**Primary Audience**
+- Who will read this comic?
+- What is their existing knowledge level?
+- What are their interests and motivations?
+
+**Secondary Audiences**
+- Who else might benefit from this comic?
+- How might their needs differ?
+
+**Reader Questions**
+- What questions will readers have?
+- What misconceptions might they bring?
+- What "aha moments" can we create?
+
+### 4. Value Proposition
+
+**Knowledge Value**
+- What will readers learn?
+- What new perspectives will they gain?
+- How will this change their understanding?
+
+**Emotional Value**
+- What emotions should readers feel?
+- What connections will they make with characters?
+- What will make this memorable?
+
+**Practical Value**
+- Can readers apply what they learn?
+- What actions might this inspire?
+- What conversations might it spark?
+
+### 5. Narrative Potential
+
+**Story Arc Candidates**
+- What natural narratives exist in the content?
+- Where is the conflict or tension?
+- What transformations occur?
+
+**Character Potential**
+- Who are the key figures?
+- What are their motivations and obstacles?
+- How do they change throughout?
+
+**Visual Opportunities**
+- What scenes have strong visual potential?
+- Where can abstract concepts become concrete images?
+- What metaphors can be visualized?
+
+**Dramatic Moments**
+- What are the breakthrough/revelation moments?
+- Where are the emotional peaks?
+- What creates tension and release?
+
+### 6. Adaptation Considerations
+
+**What to Keep**
+- Essential facts and ideas
+- Key quotes or moments
+- Core emotional beats
+
+**What to Simplify**
+- Complex explanations
+- Dense technical details
+- Lengthy descriptions
+
+**What to Expand**
+- Brief mentions that deserve more attention
+- Implied emotions or relationships
+- Visual details not in source
+
+**What to Omit**
+- Tangential information
+- Redundant examples
+- Content that doesn't serve the narrative
+
+## Output Format
+
+Analysis results should be saved to `analysis.md` with:
+
+1. **YAML Front Matter**: Metadata (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone, recommended_layout)
+2. **Target Audience**: Primary, secondary, tertiary audiences with their needs
+3. **Value Proposition**: What readers will gain (knowledge, emotional, practical)
+4. **Core Themes**: Table with theme, narrative potential, visual opportunity
+5. **Key Figures & Story Arcs**: Character profiles with arcs, visual identity, key moments
+6. **Content Signals**: Style and layout recommendations based on content type
+7. **Recommended Approaches**: Narrative approaches ranked by suitability
+
+### YAML Front Matter Example
+
+```yaml
+---
+title: "Alan Turing: The Father of Computing"
+topic: alan-turing-biography
+time_span: 1912-1954
+source_language: en
+user_language: zh  # User-specified or detected from conversation
+aspect_ratio: "3:4"
+recommended_page_count: 16
+recommended_art: ligne-claire  # ligne-claire|manga|realistic|ink-brush|chalk
+recommended_tone: neutral      # neutral|warm|dramatic|romantic|energetic|vintage|action
+recommended_layout: mixed      # standard|cinematic|dense|splash|mixed|webtoon
+---
+```
+
+### Language Fields
+
+| Field | Description |
+|-------|-------------|
+| `source_language` | Detected language of source content |
+| `user_language` | Output language for comic (user-specified option > conversation language > source_language) |
+
+## Analysis Checklist
+
+Before proceeding to storyboard:
+
+- [ ] Can I state the core message in one sentence?
+- [ ] Do I know exactly who will read this comic?
+- [ ] Have I identified at least 3 ways this comic provides value?
+- [ ] Are there clear protagonists with compelling arcs?
+- [ ] Have I found at least 5 visually powerful moments?
+- [ ] Do I understand what to keep, simplify, expand, and omit?
+- [ ] Have I identified the emotional peaks and valleys?
@@ -0,0 +1,101 @@
+# chalk
+
+粉笔画风 - Chalkboard aesthetic with hand-drawn warmth
+
+## Overview
+
+Classic classroom chalkboard aesthetic with hand-drawn chalk illustrations. Nostalgic educational feel with imperfect, sketchy lines that capture the warmth of traditional teaching.
+
+## Line Work
+
+- Sketchy, imperfect hand-drawn lines
+- Chalk texture on all strokes
+- Varying line weight from chalk pressure
+- Soft edges, no sharp digital lines
+- Visible chalk dust effects
+
+## Character Design
+
+- Simplified, friendly character designs
+- Stick figures to semi-detailed range
+- Expressive through simple gestures
+- Approachable, non-intimidating
+- Educational presenter style
+
+## Background
+
+- Chalkboard Black (#1A1A1A) or Dark Green-Black (#1C2B1C)
+- Realistic chalkboard texture
+- Subtle scratches and dust particles
+- Faint eraser marks for authenticity
+- Wooden frame border optional
+
+## Typography
+
+- Hand-drawn chalk lettering style
+- Visible chalk texture on text
+- Imperfect baseline adds authenticity
+- White or bright colored chalk for emphasis
+
+## Visual Elements
+
+- Hand-drawn chalk illustrations
+- Chalk dust effects around elements
+- Doodles: stars, arrows, underlines, circles
+- Mathematical formulas and diagrams
+- Eraser smudges and chalk residue
+- Stick figures and simple icons
+- Connection lines with hand-drawn feel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Background | Chalkboard Black | #1A1A1A |
+| Alt Background | Green-Black | #1C2B1C |
+| Primary Text | Chalk White | #F5F5F5 |
+| Accent 1 | Chalk Yellow | #FFE566 |
+| Accent 2 | Chalk Pink | #FF9999 |
+| Accent 3 | Chalk Blue | #66B3FF |
+| Accent 4 | Chalk Green | #90EE90 |
+| Accent 5 | Chalk Orange | #FFB366 |
+
+## Style Rules
+
+### Do
+- Maintain authentic chalk texture on all elements
+- Use imperfect, hand-drawn quality throughout
+- Add subtle chalk dust and smudge effects
+- Create visual hierarchy with color variety
+- Include playful doodles and annotations
+
+### Don't
+- Use perfect geometric shapes
+- Create clean digital-looking lines
+- Add photorealistic elements
+- Use gradients or glossy effects
+
+## Quality Markers
+
+- ✓ Authentic chalk texture throughout
+- ✓ Imperfect, hand-drawn quality
+- ✓ Readable despite sketchy style
+- ✓ Nostalgic classroom feel
+- ✓ Effective color hierarchy
+- ✓ Playful educational aesthetic
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic educational |
+| warm | ✓✓ | Nostalgic feel |
+| dramatic | ✗ | Style mismatch |
+| vintage | ✓ | Old school feel |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓✓ | Fun learning |
+| action | ✗ | Style mismatch |
+
+## Best For
+
+Educational content, tutorials, classroom themes, teaching materials, workshops, informal learning, knowledge sharing
@@ -0,0 +1,97 @@
+# ink-brush
+
+水墨画风 - Chinese ink brush aesthetics with dynamic strokes
+
+## Overview
+
+Traditional Chinese ink brush painting style adapted for comics. Combines calligraphic brush strokes with ink wash effects. Creates atmospheric, artistic visuals rooted in East Asian aesthetics.
+
+## Line Work
+
+- 2-3px dynamic brush strokes with varying weight
+- Ink wash effects, traditional Chinese brush feel
+- Bold, confident strokes with sharp edges
+- Flowing lines for fabric and hair
+- Pressure-sensitive stroke variation
+
+## Character Design
+
+- Realistic human proportions (7.5-8 head heights)
+- Defined features with ink brush definition
+- Dynamic poses capturing movement
+- Flowing hair and clothing in motion
+- Traditional attire options (robes, hanfu)
+- Intense, expressive faces
+
+## Brush Techniques
+
+| Technique | Usage |
+|-----------|-------|
+| Bold strokes | Character outlines |
+| Fine lines | Details, hair |
+| Ink wash | Atmosphere, shadows |
+| Dry brush | Texture, aging |
+| Splatter | Impact, drama |
+
+## Background Treatment
+
+- Dramatic landscapes: mountains, waterfalls, temples
+- Ink wash atmospheric effects
+- Misty, layered depth
+- Traditional architecture elements
+- High contrast silhouettes
+- Negative space as design element
+
+## Color Approach
+
+- Ink gradients as primary
+- Limited accent colors
+- Traditional Chinese palette
+- Atmospheric color washes
+- High contrast compositions
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Deep black ink | #1A1A1A |
+| Accent | Crimson red | #8B0000 |
+| Accent | Imperial gold | #D4AF37 |
+| Skin | Natural tan | #D4A574 |
+| Background | Misty gray | #9CA3AF |
+| Background | Earth tone | #8B7355 |
+| Wash | Ink gradient | #2D3748 |
+
+## Visual Elements
+
+- Calligraphic text integration
+- Seal stamps (optional)
+- Ink splatter effects
+- Flowing fabric trails
+- Atmospheric mist
+- Mountain silhouettes
+
+## Quality Markers
+
+- ✓ Dynamic brush stroke quality
+- ✓ Authentic ink wash atmosphere
+- ✓ High contrast compositions
+- ✓ Flowing movement in fabric/hair
+- ✓ Traditional aesthetic elements
+- ✓ Atmospheric depth
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓ | Contemplative stories |
+| warm | ✓ | Nostalgic, gentle |
+| dramatic | ✓✓ | High contrast |
+| vintage | ✓✓ | Historical pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓✓ | Martial arts |
+
+## Best For
+
+Chinese historical stories, martial arts, traditional tales, contemplative narratives, artistic adaptations
@@ -0,0 +1,75 @@
+# ligne-claire
+
+清线画风 - Uniform lines, flat colors, European comic tradition
+
+## Overview
+
+Classic European comic style originating from Hergé's Tintin. Characterized by clean, uniform outlines and flat color fills without gradients. Creates a timeless, accessible aesthetic suitable for educational and narrative content.
+
+## Line Work
+
+- Uniform, clean outlines with consistent weight (2px)
+- No hatching or cross-hatching for shading
+- Sharp, precise edges on all elements
+- Black ink outlines on all figures and objects
+- Shadows indicated through flat color areas, not line techniques
+
+## Character Design
+
+- Slightly stylized/cartoonish characters with realistic proportions
+- Distinctive, recognizable facial features
+- Expressive faces with clear emotions
+- Period-appropriate clothing with attention to detail
+- Consistent character appearance across panels
+- 6-7 head height proportions
+
+## Background Treatment
+
+- Detailed, realistic backgrounds with architectural accuracy
+- Period-specific props and technology
+- Clear spatial depth and perspective
+- Environmental storytelling through details
+- Contrast between simplified characters and detailed backgrounds
+
+## Color Approach
+
+- Flat colors without gradients (true to Ligne Claire tradition)
+- Limited palette per page for cohesion
+- Colors support narrative mood
+- Consistent lighting logic within scenes
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Clean blue | #3182CE |
+| Primary Red | Classic red | #E53E3E |
+| Primary Yellow | Warm yellow | #ECC94B |
+| Skin | Warm tan | #F7CFAE |
+| Background Light | Light cream | #FFFAF0 |
+| Background Sky | Sky blue | #BEE3F8 |
+
+## Quality Markers
+
+- ✓ Clean, uniform line weight throughout
+- ✓ Flat colors without gradients
+- ✓ Detailed backgrounds, stylized characters
+- ✓ Clear panel borders and reading flow
+- ✓ Hand-drawn text style
+- ✓ Proper perspective in environments
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Classic combination |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓ | Works with high contrast |
+| vintage | ✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✓ | Lighter stories |
+| action | ✗ | Lacks dynamic lines |
+
+## Best For
+
+Educational content, balanced narratives, biography comics, historical stories
@@ -0,0 +1,93 @@
+# manga
+
+日漫画风 - Anime/manga aesthetics with expressive characters
+
+## Overview
+
+Japanese manga art style characterized by large expressive eyes, dynamic poses, and visual emotion indicators. Versatile style that works across genres from educational to romantic to action.
+
+## Line Work
+
+- Clean, smooth lines (1.5-2px)
+- Expressive weight variation for emphasis
+- Smooth curves, dynamic strokes
+- Speed lines and motion effects available
+- Screen tone effects for atmosphere
+
+## Character Design
+
+- Anime/manga proportions: larger eyes, expressive faces
+- 5-7 head height proportions (varies by sub-style)
+- Clear emotional indicators (！, ？, sweat drops, sparkles)
+- Dynamic poses and gestures
+- Detailed hair with individual strands
+- Fashionable clothing with natural folds
+
+## Eye Styles
+
+| Type | Description |
+|------|-------------|
+| Standard | Medium-large, 2-3 highlights |
+| Educational | Friendly, approachable eyes |
+| Dramatic | Intense, detailed irises |
+| Cute | Very large, sparkly eyes |
+
+## Background Treatment
+
+- Simplified during dialogue/explanation
+- Detailed for establishing shots
+- Screen tone gradients for mood
+- Abstract backgrounds for emotional moments
+- Technical diagrams styled as displays
+
+## Color Approach
+
+- Clean, bright anime colors
+- Soft gradients on skin
+- Vibrant palette options
+- Light and shadow with soft transitions
+- Color coding for character identification
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Blue | Bright blue | #4299E1 |
+| Primary Orange | Warm orange | #ED8936 |
+| Primary Green | Soft green | #68D391 |
+| Skin | Anime warm | #FEEBC8 |
+| Background | Clean white | #FFFFFF |
+| Highlight | Golden | #FFD700 |
+
+## Visual Elements
+
+- Speech bubbles: rounded (normal), spiky (excitement)
+- Sound effects integrated visually
+- Emotion symbols (sweat drops, anger marks, hearts)
+- Speed lines and motion blur
+- Sparkle and glow effects
+
+## Quality Markers
+
+- ✓ Expressive character faces
+- ✓ Clean, consistent line work
+- ✓ Dynamic poses and compositions
+- ✓ Appropriate use of manga conventions
+- ✓ Readable panel flow
+- ✓ Consistent character designs
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Educational manga |
+| warm | ✓ | Slice of life |
+| dramatic | ✓ | Intense moments |
+| romantic | ✓✓ | Shoujo style |
+| energetic | ✓✓ | Shonen style |
+| vintage | ✗ | Style mismatch |
+| action | ✓✓ | Battle manga |
+
+## Best For
+
+Educational tutorials, romance, action, coming-of-age, technical explanations, youth-oriented content
@@ -0,0 +1,84 @@
+# minimalist
+
+极简画风 - Clean black line art, limited spot color, simplified stick-figure characters
+
+## Overview
+
+Minimalist cartoon illustration characterized by clean black line art on white background with very limited spot color for emphasis. Characters are simplified to near-stick-figure abstraction, focusing on gesture and concept rather than anatomical detail. Designed for business allegory, quick-read educational content, and concept illustration.
+
+## Line Work
+
+- Clean, uniform black lines (1.5-2px)
+- No hatching, cross-hatching, or shading techniques
+- Minimal detail — every line serves a purpose
+- Bold outlines for characters, thinner lines for props/labels
+- No decorative flourishes or ornamental lines
+
+## Character Design
+
+- Highly simplified, stick-figure-like business characters
+- Circle or oval heads with minimal facial features (dot eyes, simple line mouth)
+- Body as simple geometric shapes or line constructions
+- Distinguishing features through props only (tie, hat, briefcase, glasses)
+- No anatomical detail — expressive through posture and gesture
+- 4-5 head height proportions (squat, iconic)
+
+## Background Treatment
+
+- Mostly blank/white — negative space is a design element
+- Minimal environmental cues (a line for ground, simple desk outline)
+- Concept labels and text annotations replace detailed environments
+- Icons and symbols over realistic rendering
+- No perspective or spatial depth
+
+## Color Approach
+
+- Primarily black and white (90%+ of the image)
+- 1-2 spot accent colors for emphasis on key concepts
+- Accent color used sparingly: highlighting key objects, text labels, concept indicators
+- No gradients, no shading, no color fills on backgrounds
+- Color draws the eye to the most important element in each panel
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Black ink | `#1A1A1A` |
+| Background | Clean white | `#FFFFFF` |
+| Accent 1 | Spot orange | `#FF6B35` |
+| Accent 2 | Spot blue (optional) | `#3182CE` |
+| Text labels | Dark gray | `#4A4A4A` |
+| Panel border | Medium gray | `#666666` |
+
+## Visual Elements
+
+- Text labels with accent-color backgrounds or underlines for key terms
+- Simple icons: arrows, circles, checkmarks, crosses
+- Concept highlight boxes with spot color
+- Minimal speech bubbles (simple oval or rectangle, thin black outline)
+- No sound effects, no motion lines, no screen tones
+
+## Quality Markers
+
+- ✓ Clean, purposeful line work with no unnecessary detail
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified characters readable at small sizes
+- ✓ Text labels integrated naturally into panels
+- ✓ Strong negative space usage
+- ✓ Every element serves the narrative point
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Ideal for business/educational content |
+| warm | ✓ | Works for gentle stories, slight warmth in accent |
+| energetic | ✓ | Works for punchy, high-energy content |
+| dramatic | ✗ | Style too stripped down for dramatic intensity |
+| vintage | ✗ | Minimalist aesthetic conflicts with aged/textured look |
+| romantic | ✗ | No capacity for decorative/soft elements |
+| action | ✗ | No dynamic line capability for speed/impact |
+
+## Best For
+
+Business allegory, management fables, short concept illustration, four-panel comic strips, quick-insight education, social media content
@@ -0,0 +1,89 @@
+# realistic
+
+写实画风 - Digital painting with realistic proportions and lighting
+
+## Overview
+
+Full-color realistic manga style using digital painting techniques. Features anatomically accurate characters, rich gradients, and detailed environmental rendering. Sophisticated aesthetic for mature audiences.
+
+## Line Work
+
+- Clean, precise outlines with clear contours
+- Uniform line weight for character definition
+- No excessive hatching - rely on color for depth
+- Smooth curves and realistic anatomical lines
+- Ligne Claire influence: clean but not simplified
+
+## Character Design
+
+- Realistic human proportions (7-8 head heights)
+- Anatomically accurate features and expressions
+- Detailed facial structure without exaggeration
+- Natural poses and body language
+- Consistent appearance across panels
+- Subtle expressions rather than manga-style
+
+## Rendering Style
+
+- Full-color digital painting with rich gradients
+- Soft shadow transitions on skin and fabric
+- Realistic material textures (glass, liquid, fabric, wood)
+- Detailed hair with natural shine and volume
+- Environmental lighting affects all elements
+- NOT flat cel-shading - smooth color blending
+
+## Background Treatment
+
+- Highly detailed, realistic environments
+- Accurate perspective and spatial depth
+- Atmospheric lighting (warm indoor, cool outdoor)
+- Professional settings rendered with precision
+- Props and objects with realistic textures
+
+## Color Approach
+
+- Rich gradients for depth and volume
+- Realistic lighting with warm/cool contrast
+- Material-specific rendering
+- Subtle color temperature shifts
+- Professional, sophisticated palette
+
+## Default Color Palette
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Skin Light | Natural warm | #F5D6C6 |
+| Skin Shadow | Warm shadow | #E8C4B0 |
+| Environment | Warm wood | #8B7355 |
+| Environment Cool | Cool stone | #9CA3AF |
+| Accent | Wine red | #722F37 |
+| Accent Gold | Gold | #D4AF37 |
+| Light Warm | Amber | #FFB347 |
+| Light Cool | Cool blue | #B0C4DE |
+
+## Quality Markers
+
+- ✓ Anatomically accurate proportions
+- ✓ Smooth color gradients (not flat fills)
+- ✓ Realistic material textures
+- ✓ Detailed, atmospheric backgrounds
+- ✓ Natural lighting with soft shadows
+- ✓ Expressive but subtle expressions
+- ✓ Professional aesthetic
+- ✓ Clean speech bubbles
+
+## Compatibility
+
+| Tone | Fit | Notes |
+|------|-----|-------|
+| neutral | ✓✓ | Professional content |
+| warm | ✓✓ | Nostalgic stories |
+| dramatic | ✓✓ | High drama |
+| vintage | ✓✓ | Period pieces |
+| romantic | ✗ | Style mismatch |
+| energetic | ✗ | Too refined |
+| action | ✓ | Serious action |
+
+## Best For
+
+Professional topics (wine, food, business), lifestyle content, adult narratives, documentary-style, mature educational guides
@@ -0,0 +1,71 @@
+# Auto Selection
+
+Content signals determine default art + tone + layout (or preset).
+
+## Content Signal Matrix
+
+| Content Signals | Art Style | Tone | Layout | Preset |
+|-----------------|-----------|------|--------|--------|
+| Tutorial, how-to, beginner | manga | neutral | webtoon | **ohmsha** |
+| Computing, AI, programming | manga | neutral | dense | **ohmsha** |
+| Technical explanation, educational | manga | neutral | webtoon | **ohmsha** |
+| Pre-1950, classical, ancient | realistic | vintage | cinematic | - |
+| Personal story, mentor | ligne-claire | warm | standard | - |
+| Psychology, motivation, self-help, coaching | manga | warm | standard | **concept-story** |
+| Business narrative, management, leadership | manga | warm | standard | **concept-story** |
+| Conflict, breakthrough | (inherit) | dramatic | splash | - |
+| Wine, food, lifestyle | realistic | neutral | cinematic | - |
+| Martial arts, wuxia, xianxia | ink-brush | action | splash | **wuxia** |
+| Romance, love, school life | manga | romantic | standard | **shoujo** |
+| Business allegory, fable, parable, short insight, 四格 | minimalist | neutral | four-panel | **four-panel** |
+| Biography, balanced | ligne-claire | neutral | mixed | - |
+
+## Preset Recommendation Rules
+
+**When preset is recommended**: Load `presets/{preset}.md` and apply all special rules.
+
+### ohmsha
+- **Triggers**: Tutorial, technical, educational, computing, programming, how-to, beginner
+- **Special rules**: Visual metaphors, NO talking heads, gadget reveals, Doraemon-style characters
+- **Base**: manga + neutral + webtoon/dense
+
+### wuxia
+- **Triggers**: Martial arts, wuxia, xianxia, cultivation, swordplay
+- **Special rules**: Qi effects, combat visuals, atmospheric elements
+- **Base**: ink-brush + action + splash
+
+### shoujo
+- **Triggers**: Romance, love story, school life, emotional drama
+- **Special rules**: Decorative elements, eye details, romantic beats
+- **Base**: manga + romantic + standard
+
+### concept-story
+- **Triggers**: Psychology, motivation, self-help, business narrative, management, leadership, personal growth, coaching, soft skills, abstract concept through story
+- **Special rules**: Visual symbol system, growth arc, dialogue+action balance, original characters
+- **Base**: manga + warm + standard
+
+### four-panel
+- **Triggers**: Business allegory, fable, parable, short insight, four-panel, 四格, 四格漫画, single-page comic, minimalist comic strip
+- **Special rules**: Strict 起承转合 4-panel structure, B&W + spot color, simplified stick-figure characters, single-page story
+- **Base**: minimalist + neutral + four-panel
+
+## Compatibility Matrix
+
+Art Style × Tone combinations work best when matched appropriately:
+
+| Art Style | ✓✓ Best | ✓ Works | ✗ Avoid |
+|-----------|---------|---------|---------|
+| ligne-claire | neutral, warm | dramatic, vintage, energetic | romantic, action |
+| manga | neutral, romantic, energetic, action | warm, dramatic | vintage |
+| realistic | neutral, warm, dramatic, vintage | action | romantic, energetic |
+| ink-brush | neutral, dramatic, action, vintage | warm | romantic, energetic |
+| chalk | neutral, warm, energetic | vintage | dramatic, action, romantic |
+| minimalist | neutral | warm, energetic | dramatic, vintage, romantic, action |
+
+**Note**: Art Style × Tone × Layout can be freely combined. Incompatible combinations work but may produce unexpected results.
+
+## Priority Order
+
+1. User-specified options (art / tone / style)
+2. Content signal analysis → auto-selection
+3. Fallback: ligne-claire + neutral + standard
@@ -0,0 +1,98 @@
+Create a knowledge biography comic page following these guidelines:
+
+## Image Specifications
+
+- **Type**: Comic book page with multiple panels
+- **Orientation**: Portrait (vertical)
+- **Aspect Ratio**: 2:3
+- **Style**: See style-specific reference for visual guidelines
+
+## Panel Structure
+
+### Panel Borders
+- Clean black lines (1-2px) around each panel
+- White gutters between panels (8-12px)
+- Panels arranged for clear reading flow
+- Variety in panel sizes for visual rhythm
+
+### Panel Composition
+- Clear focal points in each panel
+- Proper use of foreground, midground, background
+- Camera angles vary: eye level, bird's eye, low angle, close-up, wide shot
+- Action flows logically between panels
+- Negative space used intentionally
+
+## Text Elements
+
+### Speech Bubbles
+- **Dialogue**: Oval/elliptical bubbles with pointed tails
+- White fill with thin black outline
+- Tail points clearly to speaker
+- Hand-lettered style font (not computer-generated)
+
+### Narrator Boxes
+- **Fourth Wall/Narrator**: Rectangular boxes
+- Often positioned at panel edges (top or bottom)
+- Slightly different fill color (cream or light yellow)
+- Used for commentary, time jumps, explanations
+
+### Thought Bubbles
+- Cloud-shaped with bubble trail leading to thinker
+- Softer outline than speech bubbles
+- For internal monologue
+
+### Caption Bars
+- Rectangular bars at panel edges
+- Time and place information
+- "Meanwhile...", "Three years later..." type transitions
+- Darker fill with white text, or vice versa
+
+### Typography
+- Hand-drawn lettering style throughout
+- Bold for emphasis and key terms
+- Consistent letter sizing
+- Chinese text: use full-width punctuation ""，。！
+- Clear hierarchy: titles > dialogue > captions
+
+## Scientific/Concept Visualization
+
+When depicting abstract concepts:
+
+| Concept | Visual Metaphor |
+|---------|----------------|
+| Neural networks | Glowing nodes connected by clean lines |
+| Data flow | Luminous particles along simple paths |
+| Algorithms | Geometric patterns, building blocks |
+| Logic/proof | Interlocking puzzle pieces |
+| Discovery | Light breaking through darkness |
+| Uncertainty | Forking paths, question marks |
+| Time | Clock motifs, calendar pages |
+
+- Integrate diagrams naturally into narrative panels
+- Use inset panels or thought-bubble style for explanations
+- Simplified iconography over realistic depiction
+
+## Fourth Wall / Narrator Character
+
+When depicting narrator characters addressing the reader:
+- Character may look directly out of panel
+- Can appear in "present day" framing scenes
+- Distinct visual treatment from main timeline
+- Often at page edges or in dedicated panels
+- May comment on or question the events shown
+
+## Historical Accuracy
+
+- Research period-specific details: costumes, technology, architecture
+- Show aging naturally for characters across time periods
+- Iconic items and locations rendered recognizably
+- Balance accuracy with stylization
+
+## Language
+
+- All text in Chinese (中文) unless source material is in another language
+- Use Chinese full-width punctuation: ""，。！
+
+---
+
+Please generate the comic page based on the content provided below:
@@ -0,0 +1,180 @@
+# Character Definition Template
+
+## Character Document Format
+
+Create `characters/characters.md` with the following structure:
+
+```markdown
+# Character Definitions - [Comic Title]
+
+**Style**: [selected style]
+**Art Direction**: [Ligne Claire / Manga / etc.]
+
+---
+
+## Character 1: [Name]
+
+**Role**: [Protagonist / Mentor / Antagonist / Narrator]
+**Age**: [approximate age or age range in story]
+
+**Appearance**:
+- Face shape: [oval/square/round]
+- Hair: [color, style, length]
+- Eyes: [color, shape, distinctive features]
+- Build: [height, body type]
+- Distinguishing features: [glasses, beard, scar, etc.]
+
+**Costume**:
+- Default outfit: [detailed description]
+- Color palette: [primary colors for this character]
+- Accessories: [hat, bag, tools, etc.]
+
+**Expression Range**:
+- Neutral: [description]
+- Happy/Excited: [description]
+- Thinking/Confused: [description]
+- Determined: [description]
+
+**Visual Reference Notes**:
+[Any specific artistic direction]
+
+---
+
+## Character 2: [Name]
+...
+```
+
+## Reference Sheet Image Prompt
+
+After character definitions, include a prompt for generating the reference sheet:
+
+```markdown
+## Reference Sheet Prompt
+
+Character reference sheet in [style] style, clean lines, flat colors:
+
+[ROW 1 - Character Name]:
+- Front view: [detailed description]
+- 3/4 view: [description]
+- Expression sheet: Neutral | Happy | Focused | Worried
+
+[ROW 2 - Character Name]:
+...
+
+COLOR PALETTE:
+- [Character 1]: [colors]
+- [Character 2]: [colors]
+
+White background, clear labels under each character.
+```
+
+## Example: Turing Biography
+
+```markdown
+# Character Definitions - The Imitation Game
+
+**Style**: classic (Ligne Claire)
+**Art Direction**: Clean lines, muted colors, period-accurate details
+
+---
+
+## Character 1: Alan Turing
+
+**Role**: Protagonist
+**Age**: 25-40 (varies across story)
+
+**Appearance**:
+- Face shape: Oval, slightly angular
+- Hair: Dark brown, wavy, slightly disheveled
+- Eyes: Deep-set, intense gaze
+- Build: Tall, lean, slightly awkward posture
+- Distinguishing features: Prominent brow, thoughtful expression
+
+**Costume**:
+- Default outfit: Tweed jacket with elbow patches, white shirt, no tie
+- Color palette: Muted browns, navy blue, cream
+- Accessories: Occasionally a pipe, papers/notebooks
+
+**Expression Range**:
+- Neutral: Thoughtful, slightly distant
+- Happy/Excited: Eureka moment, eyes bright, subtle smile
+- Thinking/Confused: Furrowed brow, looking at abstract space
+- Determined: Jaw set, focused eyes
+
+---
+
+## Character 2: The Bombe Machine
+
+**Role**: Supporting (anthropomorphized)
+**Appearance**:
+- Large brass and wood cabinet
+- Dial "eyes" that can express states
+- Paper tape "mouth"
+- Indicator lights for emotions
+
+**Expression Range**:
+- Processing: Spinning dials, humming
+- Success: Lights up warmly
+- Stuck: Smoke wisps, stuttering
+
+---
+
+## Reference Sheet Prompt
+
+Character reference sheet in Ligne Claire style, clean lines, flat colors:
+
+TOP ROW - Alan Turing:
+- Front view: Young man, 30s, short dark wavy hair, thoughtful expression, wearing tweed jacket with elbow patches, white shirt
+- 3/4 view: Same character, slight smile, showing profile of nose
+- Expression sheet: Neutral | Excited (eureka moment) | Focused (working) | Worried
+
+BOTTOM ROW - The Bombe Machine (anthropomorphized):
+- Bombe machine as character: Large, brass and wood, dial "eyes", paper tape "mouth"
+- Expressions: Processing (spinning dials) | Success (lights up) | Stuck (smoke wisps)
+
+COLOR PALETTE:
+- Turing: Muted browns (#8B7355), navy blue (#2C3E50), cream (#F5F5DC)
+- Machine: Brass (#B5A642), mahogany (#4E2728), emerald indicators (#2ECC71)
+
+White background, clear labels under each character.
+```
+
+## Handling Age Variants
+
+For biographies spanning many years, define age variants:
+
+```markdown
+## Alan Turing - Age Variants
+
+### Young (1920s, age 10-18)
+- Boyish features, round face
+- School uniform (Sherborne)
+- Curious, eager expression
+
+### Adult (1930s-40s, age 25-35)
+- Angular face, defined jaw
+- Tweed jacket, rumpled appearance
+- Intense, focused expression
+
+### Later (1950s, age 40+)
+- Slightly weathered
+- More casual dress
+- Thoughtful, sometimes melancholic
+```
+
+## Best Practices
+
+| Practice | Description |
+|----------|-------------|
+| Be specific | "Short dark wavy hair, parted left" not just "dark hair" |
+| Use distinguishing features | Glasses, scars, accessories that identify character |
+| Define color codes | Use specific color names or hex codes |
+| Include age markers | Wrinkles, posture, clothing style matching era |
+| Reference real people | For historical figures, note "based on 1940s photographs" |
+
+## Why Character Reference Matters
+
+Without unified character definition, AI generates inconsistent appearances. The reference sheet provides:
+1. Visual anchors for consistent features
+2. Color palettes for consistent coloring
+3. Expression documentation for emotional portrayals
@@ -0,0 +1,23 @@
+# cinematic
+
+Wide panels, filmic feel
+
+## Panel Structure
+
+- **Panels per page**: 2-4
+- **Structure**: Horizontal emphasis, wide aspect panels
+- **Gutters**: Generous spacing (12-15px)
+
+## Grid Configuration
+
+- 1-2 columns, horizontal emphasis
+- Panel sizes: Wide aspect ratios (3:1, 4:1)
+- Reading flow: Horizontal sweep, filmic rhythm
+
+## Best For
+
+Establishing shots, dramatic moments, landscapes
+
+## Best Style Pairings
+
+dramatic, classic, sepia
@@ -0,0 +1,23 @@
+# dense
+
+Information-rich, educational focus
+
+## Panel Structure
+
+- **Panels per page**: 6-9
+- **Structure**: Compact grid, smaller panels
+- **Gutters**: Tight spacing (4-6px)
+
+## Grid Configuration
+
+- 3 columns × 3 rows
+- Panel sizes: Compact, uniform
+- Reading flow: Rapid progression, information-rich
+
+## Best For
+
+Technical explanations, complex narratives, timelines
+
+## Best Style Pairings
+
+ohmsha, vibrant
@@ -0,0 +1,40 @@
+# four-panel
+
+四格漫画 - Strict 2×2 grid, single-page story
+
+## Panel Structure
+
+- **Panels per page**: 4 (exactly, no variation)
+- **Structure**: Strict 2×2 equal grid
+- **Gutters**: Consistent white space (8-10px), uniform on all sides
+
+## Grid Configuration
+
+- 2 columns × 2 rows, all panels identical size
+- Panel sizes: Exactly equal (each panel = 25% of content area)
+- Reading flow: Z-pattern — Panel 1 (top-left) → Panel 2 (top-right) → Panel 3 (bottom-left) → Panel 4 (bottom-right)
+
+## Narrative Structure
+
+Each panel serves a specific narrative role (起承转合 / kishōtenketsu):
+
+| Panel | Position | Role | Purpose |
+|-------|----------|------|---------|
+| 1 | Top-left | 起 Setup | Establish situation, introduce characters/problem |
+| 2 | Top-right | 承 Development | Build on setup, add complication or attempt |
+| 3 | Bottom-left | 转 Turn | Twist, key insight, or reversal — the pivotal moment |
+| 4 | Bottom-right | 合 Conclusion | Resolution, punchline, or takeaway |
+
+## Aspect Ratio
+
+- Recommended page aspect: **4:3** (landscape)
+- Landscape gives each panel a comfortable wide rectangle
+- Portrait (3:4) makes panels tall and narrow — avoid for this layout
+
+## Best For
+
+Business allegory, quick-insight education, social media comics, fables, parables, single-concept explanation
+
+## Best Style Pairings
+
+minimalist, ligne-claire, chalk
@@ -0,0 +1,23 @@
+# mixed
+
+Dynamic, varied rhythm
+
+## Panel Structure
+
+- **Panels per page**: 3-7 (varies)
+- **Structure**: Intentionally varied for pacing
+- **Gutters**: Dynamic spacing
+
+## Grid Configuration
+
+- Intentionally irregular
+- Panel sizes: Varied for pacing and emphasis
+- Reading flow: Guides eye through varied rhythm
+
+## Best For
+
+Action sequences, emotional arcs, complex stories
+
+## Best Style Pairings
+
+dramatic, vibrant, ohmsha
@@ -0,0 +1,23 @@
+# splash
+
+Impact-focused, key moments
+
+## Panel Structure
+
+- **Panels per page**: 1-2 large + 2-3 small
+- **Structure**: Dominant splash with supporting panels
+- **Gutters**: Varied for emphasis
+
+## Grid Configuration
+
+- 1 dominant panel + 2-3 supporting
+- Panel sizes: 50-70% splash, remainder small
+- Reading flow: Splash dominates, supporting panels accent
+
+## Best For
+
+Revelations, breakthroughs, chapter openings
+
+## Best Style Pairings
+
+dramatic, classic, vibrant
@@ -0,0 +1,23 @@
+# standard
+
+Classic comic grid, versatile
+
+## Panel Structure
+
+- **Panels per page**: 4-6
+- **Structure**: Regular grid with occasional variation
+- **Gutters**: Consistent white space (8-10px)
+
+## Grid Configuration
+
+- 2-3 columns × 2-3 rows
+- Panel sizes: Mostly equal, occasional variation
+- Reading flow: Left→right, top→bottom (Z-pattern)
+
+## Best For
+
+Narrative flow, dialogue scenes
+
+## Best Style Pairings
+
+classic, warm, sepia
@@ -0,0 +1,30 @@
+# webtoon
+
+Vertical scrolling comic (竖版条漫)
+
+## Panel Structure
+
+- **Panels per page**: 3-5 vertically stacked
+- **Structure**: Single column, vertical flow optimized for scrolling
+- **Gutters**: Generous vertical spacing (20-40px), panels often bleed horizontally
+
+## Grid Configuration
+
+- Single column, vertical stack
+- Panel sizes: Full width, variable height (1:1 to 1:2 aspect)
+- Reading flow: Top→bottom continuous scroll
+
+## Special Features
+
+- Panels can extend beyond frame for dramatic effect
+- Generous whitespace between beats
+- Character close-ups alternate with wide explanation panels
+- "Float" effect - elements can exist between panels
+
+## Best For
+
+Ohmsha-style tutorials, mobile reading, step-by-step guides
+
+## Best Style Pairings
+
+ohmsha, vibrant
@@ -0,0 +1,85 @@
+# Ohmsha Manga Guide Style
+
+Guidelines for educational manga comics using the `ohmsha` preset.
+
+## Character Setup
+
+| Role | Default | Traits |
+|------|---------|--------|
+| Student (Role A) | 大雄 | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Antagonist (Role C, optional) | 胖虎 | Represents misunderstanding, or "noise" in the data |
+
+Custom characters: ask the user for role → name mappings (e.g., `Student:小明, Mentor:教授, Antagonist:Bug怪`).
+
+## Character Reference Sheet Style
+
+For Ohmsha style, use manga/anime style with:
+- Exaggerated expressions for educational clarity
+- Simple, distinctive silhouettes
+- Bright, saturated color palettes
+- Chibi/SD (super-deformed) variants for comedic reactions
+
+## Outline Spec Block
+
+Every ohmsha outline must start with:
+
+```markdown
+【漫画规格单】
+- Language: [Same as input content]
+- Style: Ohmsha (Manga Guide), Full Color
+- Layout: Vertical Scrolling Comic (竖版条漫)
+- Characters: [List character names and roles]
+- Character Reference: characters/characters.png
+- Page Limit: ≤20 pages
+```
+
+## Visual Metaphor Rules (Critical)
+
+**NEVER** create "talking heads" panels. Every technical concept must become:
+
+1. **A tangible gadget/prop** - Something characters can hold, use, demonstrate
+2. **An action scene** - Characters doing something that illustrates the concept
+3. **A visual environment** - Stepping into a metaphorical space
+
+### Examples
+
+| Concept | Bad (Talking Heads) | Good (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Word embeddings | Characters discussing vectors | 哆啦A梦拿出"词向量压缩机"，把书本压缩成彩色小球 |
+| Gradient descent | Explaining math formula | 大雄在山谷地形上滚球，寻找最低点 |
+| Neural network | Diagram on whiteboard | 角色走进由发光节点组成的网络迷宫 |
+
+## Page Title Convention
+
+Avoid AI-style "Title: Subtitle" format. Use narrative descriptions:
+
+- ❌ "Page 3: Introduction to Neural Networks"
+- ✓ "Page 3: 大雄被海量单词淹没，哆啦A梦拿出'词向量压缩机'"
+
+## Ending Requirements
+
+- NO generic endings ("What will you choose?", "Thanks for reading")
+- End with: Technical summary moment OR character achieving a small goal
+- Final panel: Sense of accomplishment, not open-ended question
+
+### Good Endings
+
+- Student successfully applies learned concept
+- Visual callback to opening problem, now solved
+- Mentor gives summary while student demonstrates understanding
+
+### Bad Endings
+
+- "What do you think?" open questions
+- "Thanks for reading this tutorial"
+- Cliffhanger without resolution
+
+## Layout Preference
+
+Ohmsha style typically uses:
+- `webtoon` (vertical scrolling) - Primary choice
+- `dense` - For information-heavy sections
+- `mixed` - For varied pacing
+
+Avoid `cinematic` and `splash` for educational content.
@@ -0,0 +1,106 @@
+# Partial Workflows
+
+Options to run specific parts of the workflow. Trigger these via natural language (e.g., "just the storyboard", "regenerate page 3").
+
+## Options Summary
+
+| Option | Steps Executed | Output |
+|--------|----------------|--------|
+| Storyboard only | 1-3 | `storyboard.md` + `characters/` |
+| Prompts only | 1-5 | + `prompts/*.md` |
+| Images only | 7-8 | + images |
+| Regenerate N | 7 (partial) | Specific page(s) |
+
+---
+
+## Storyboard-only
+
+Generate storyboard and characters without prompts or images.
+
+**User cue**: "storyboard only", "just the outline", "don't generate images yet".
+
+**Workflow**: Steps 1-3 only (stop after storyboard + characters)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+
+**Use case**: Review and edit the storyboard before generating images. Useful for:
+- Getting feedback on the narrative structure
+- Making manual adjustments to panel layouts
+- Defining custom characters
+
+---
+
+## Prompts-only
+
+Generate storyboard, characters, and prompts without images.
+
+**User cue**: "prompts only", "write the prompts but don't generate yet".
+
+**Workflow**: Steps 1-5 (generate prompts, skip images)
+
+**Output**:
+- `analysis.md`
+- `storyboard.md`
+- `characters/characters.md`
+- `prompts/*.md`
+
+**Use case**: Review and edit prompts before image generation. Useful for:
+- Fine-tuning image generation prompts
+- Ensuring visual consistency before committing to generation
+- Making style adjustments at the prompt level
+
+---
+
+## Images-only
+
+Generate images from existing prompts (starts at Step 7).
+
+**User cue**: "generate images from existing prompts", "run the images now" (pointing at an existing `comic/topic-slug/` directory).
+
+**Workflow**: Skip to Step 7, then 8
+
+**Prerequisites** (must exist in directory):
+- `prompts/` directory with page prompt files
+- `storyboard.md` with style information
+- `characters/characters.md` with character definitions
+
+**Output**:
+- `characters/characters.png` (if not exists)
+- `NN-{cover|page}-[slug].png` images
+
+**Use case**: Re-generate images after editing prompts. Useful for:
+- Recovering from failed image generation
+- Trying different image generation settings
+- Regenerating after manual prompt edits
+
+---
+
+## Regenerate
+
+Regenerate specific pages only.
+
+**User cue**: "regenerate page 3", "redo pages 2, 5, 8", "regenerate the cover".
+
+**Workflow**:
+1. Read existing prompts for specified pages
+2. Regenerate images only for those pages via `image_generate`
+3. Download each returned URL and overwrite the existing PNG
+
+**Prerequisites** (must exist):
+- `prompts/NN-{cover|page}-[slug].md` for specified pages
+- `characters/characters.md` (for agent-side consistency checks, if it was used originally)
+
+**Output**:
+- Regenerated `NN-{cover|page}-[slug].png` for specified pages
+
+**Use case**: Fix specific pages without regenerating entire comic. Useful for:
+- Fixing a single problematic page
+- Iterating on specific visuals
+- Regenerating pages after prompt edits
+
+**Page numbering**:
+- `0` = Cover page
+- `1-N` = Content pages
@@ -0,0 +1,121 @@
+# concept-story
+
+概念故事预设 - Narrative comics that visualize abstract concepts through character-driven stories
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | warm |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=warm
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `concept-story` preset is selected, ALL rules below must be applied.
+
+### Concept Visualization System (CRITICAL)
+
+Each major abstract concept SHOULD have a recurring visual symbol/metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Psychological need | Tangible object character holds or discovers (e.g., glowing energy ball = competence) |
+| Management principle | Environmental metaphor character navigates (e.g., ship wheel = autonomy) |
+| Growth/development | Living organic symbol that transforms (e.g., seed → flowering plant = relatedness) |
+| Abstract framework | Spatial structure characters can enter or observe |
+| Emotional state | Color/lighting shift in the scene atmosphere |
+
+**Unlike ohmsha**: Dialogue panels are allowed and expected. The goal is to COMBINE visual metaphors WITH dialogue, not replace dialogue entirely.
+
+**Pattern**: "Dialogue introduces idea" → "Visual metaphor illustrates it" → "Character reacts/applies it"
+
+### Visual Symbol Continuity
+
+Symbols must persist across the story:
+
+| Stage | Treatment |
+|-------|-----------|
+| Introduction | Symbol appears with soft glow effect when concept is first mentioned |
+| Recurrence | Same symbol reappears in background or character interaction when concept is referenced |
+| Resolution | ALL symbols gather in the final composition, showing integration of learned concepts |
+
+**Storyboard requirement**: Include a Symbol Mapping Table defining concept → visual symbol before panel breakdown.
+
+### Character Archetypes (Flexible)
+
+Create original characters based on content domain. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Learner/worker facing a challenge | Modern professional or student, relatable, starts with constrained posture |
+| Mentor | Experienced guide who teaches through experience | Slightly older, calm demeanor, warm color accents |
+| Catalyst | Person or event that triggers transformation | Can be a colleague, situation, challenge, or opportunity |
+
+**IMPORTANT**: Characters are created fresh each time based on the source content's domain (business, psychology, education, etc.). No default character set.
+
+### Narrative Arc Structure
+
+Enforce a five-stage growth arc:
+
+| Act | Structure | Visual Tone |
+|-----|-----------|------------|
+| Opening | Protagonist stuck in routine, faces frustration | Muted warm tones, tight framing, constrained compositions |
+| Inciting moment | Mentor appears or opportunity arrives | Brightness increases, panels open up |
+| Learning | Concepts introduced through visual metaphors | Rich warm palette, symbols introduced one by one |
+| Turning point | Protagonist applies knowledge, faces test | Contrast increases, dynamic compositions |
+| Transformation | Growth demonstrated, new understanding visible | Full warm palette, expansive composition, all symbols present |
+
+### Dialogue + Action Balance
+
+- Dialogue is encouraged and expected (unlike ohmsha's NO talking heads rule)
+- Every page should combine at least one dialogue panel with at least one visual/action panel
+- Avoid pure "lecture" pages where a character explains for 4+ panels straight
+- When a character explains a concept verbally, the NEXT panel should visualize it
+
+**Wrong approach**: Four consecutive panels of mentor lecturing at protagonist
+**Right approach**: Mentor introduces concept → visual metaphor panel → protagonist reacts → applies understanding
+
+### Scene Atmosphere Rules
+
+| Scene Type | Atmosphere |
+|------------|-----------|
+| Problem/frustration | Cool muted tones over warm base, tight framing, cluttered environment |
+| Mentoring moment | Golden hour lighting, open composition, warm indoor glow |
+| Concept visualization | Soft glow effects, clean simplified backgrounds, symbol spotlight |
+| Growth/transformation | Warm light expanding outward, character posture opening up |
+| Resolution | Full warm palette, spacious composition, all visual symbols visible |
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Protagonist demonstrating transformed understanding (not just being told)
+2. Visual callback showing contrast with opening state (e.g., wilted plant → thriving plant)
+3. All concept symbols visible together in the composition
+4. A forward-looking element suggesting ongoing growth (not a closed ending)
+
+### Page Title Convention
+
+Every page MUST have a narrative title:
+
+**Wrong**: "Chapter 3: Self-Determination Theory"
+**Right**: "The Day Xiao Ming Found His Own Engine"
+
+## Quality Markers
+
+- ✓ Each major concept has a recurring visual symbol
+- ✓ Dialogue and visual metaphors work together (not one replacing the other)
+- ✓ Clear growth arc from problem to transformation
+- ✓ Original characters suited to the content domain
+- ✓ Warm, professional atmosphere throughout
+- ✓ Visual symbols recur and accumulate through the story
+- ✓ Final page integrates all concept symbols with transformation callback
+
+## Best For
+
+Psychology concepts, business/management principles, motivation theory, personal development,
+self-help content, leadership frameworks, coaching narratives, soft skill education,
+abstract concept explanation through character-driven stories
@@ -0,0 +1,107 @@
+# four-panel
+
+四格漫画预设 - Minimalist four-panel business allegory comics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | minimalist |
+| Tone | neutral |
+| Layout | four-panel (default) |
+| Aspect | 4:3 (landscape) |
+
+Equivalent to: art=minimalist, tone=neutral, layout=four-panel, aspect=4:3
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `four-panel` preset is selected, ALL rules below must be applied.
+
+### 起承转合 Narrative Structure (CRITICAL)
+
+Every comic MUST follow the four-panel 起承转合 structure:
+
+| Panel | Role | Requirements |
+|-------|------|-------------|
+| 1 (起 Setup) | Introduce the situation | Show character(s) in a recognizable context. Establish the "normal" state or problem |
+| 2 (承 Development) | Build on the setup | Add complication, show an attempt, or introduce the concept. Stakes become clearer |
+| 3 (转 Turn) | The twist or key insight | **Most important panel.** Show the unexpected reversal, contrast, or "aha" moment that makes the allegory work |
+| 4 (合 Conclusion) | Resolution and takeaway | Show the result, consequence, or lesson learned. Can be a visual punchline or summary |
+
+**CRITICAL**: Do NOT deviate from exactly 4 panels. No 5th panel, no title panel, no footer panel within the image.
+
+### Single-Page Story Rule (CRITICAL)
+
+- The entire story is told in ONE page with exactly 4 panels
+- Page count: always 1 (plus optional cover)
+- No multi-page four-panel stories — if content requires more, create multiple separate four-panel comics
+- Storyboard structure: Cover (optional) + 1 page
+
+### Accent Color System
+
+- The image is primarily black-and-white line art
+- Use exactly 1-2 spot colors per strip (default: orange `#FF6B35`)
+- Rules:
+  - Key concept label or object: filled with accent color or outlined in accent
+  - Panel 3 (转 Turn) should have the strongest color emphasis
+  - Characters remain B&W — color is for concepts/objects/labels only
+  - Consistent accent color across all 4 panels (do not switch colors between panels)
+
+### Character Design Rules
+
+- Simplified stick-figure-like characters
+- Distinguish characters through simple props: ties, glasses, hats, briefcases, aprons
+- No detailed faces — dot eyes, line mouth at most
+- Characters should be generic enough to represent archetypes (the manager, the employee, the customer)
+- Maximum 2-3 characters per strip
+
+### Text in Panels
+
+- Chinese text for dialogue and labels (or match source language)
+- Keep text minimal — 1-2 short lines per panel maximum
+- Key concept terms can be highlighted with accent color background
+- No narrator boxes — dialogue and labels only
+- Speech bubbles: simple rectangles or ovals, thin black outline
+
+### Optional Title & Caption
+
+- A brief descriptive title above the 4 panels
+- An optional one-line caption/moral below the panels
+- These are part of the page composition, not separate panels
+
+### Character Archetypes (Flexible)
+
+Create simple stick-figure characters based on content. No fixed defaults:
+
+| Role | Archetype | Visual Cues |
+|------|-----------|------------|
+| Protagonist | Worker/employee facing a situation | Simple figure, minimal distinguishing feature (glasses, tie) |
+| Authority | Boss/manager/expert | Slightly larger figure, or prop like pointer/clipboard |
+| Object | The concept itself | Labeled object, icon, or highlighted text with accent color |
+
+### Prompt Template
+
+When generating image prompts for four-panel comics, include these keywords:
+
+> A minimalist, clean line art digital comic strip in a four-panel grid layout (2×2). The style is simplified cartoon illustration with clear black outlines and a minimal color palette of black, white, and specific spot [accent color] for key concepts.
+
+Each panel description should specify:
+- Panel position (Top Left / Top Right / Bottom Left / Bottom Right)
+- Character poses and gestures (simple, stick-figure style)
+- Dialogue text in Chinese (hand-drawn style)
+- Any accent-colored elements (concept labels, key objects)
+
+## Quality Markers
+
+- ✓ Exactly 4 panels in strict 2×2 grid
+- ✓ 起承转合 narrative arc clearly present
+- ✓ 90%+ black-and-white with strategic spot color
+- ✓ Simplified stick-figure characters
+- ✓ Key concept visually highlighted with accent color
+- ✓ Text is minimal and in Chinese (or source language)
+- ✓ Single complete story in one page
+- ✓ Panel 3 delivers a clear "turn" or insight
+
+## Best For
+
+Business allegory, management fables, short insights, workplace parables, concept contrasts, social media educational content, quick-read comics
@@ -0,0 +1,114 @@
+# ohmsha
+
+Ohmsha预设 - Educational manga with visual metaphors
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | neutral |
+| Layout | webtoon (default) |
+
+Equivalent to: art=manga, tone=neutral
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `ohmsha` preset is selected, ALL rules below must be applied.
+
+### Visual Metaphor Requirements (CRITICAL)
+
+Every technical concept MUST be visualized as a metaphor:
+
+| Concept Type | Visualization Approach |
+|-------------|----------------------|
+| Algorithm | Gadget/machine that demonstrates the process |
+| Data structure | Physical space characters can enter/explore |
+| Mathematical formula | Transformation visible in environment |
+| Abstract process | Tangible flow of particles/objects |
+
+**Wrong approach**: Character points at blackboard explaining
+**Right approach**: Character uses "Concept Visualizer" gadget, steps into metaphorical space
+
+### Visual Metaphor Examples
+
+| Concept | Wrong (Talking Head) | Right (Visual Metaphor) |
+|---------|---------------------|------------------------|
+| Attention mechanism | Character points at formula on blackboard | "Attention Flashlight" gadget illuminates key words in dark room |
+| Gradient descent | "The algorithm minimizes loss" | Character rides ball rolling down mountain valley |
+| Neural network | Diagram with arrows | Living network of glowing creatures passing messages |
+| Overfitting | "The model memorized the data" | Character wearing clothes that fit only one specific pose |
+
+### Character Roles (Required)
+
+**DEFAULT: Use Doraemon characters** unless user explicitly specifies custom characters.
+
+| Role | Default Character | Visual | Traits |
+|------|-------------------|--------|--------|
+| Student (Role A) | 大雄 (Nobita) | Boy, 10yo, round glasses, black hair, yellow shirt, navy shorts | Confused, asks basic but crucial questions, represents reader |
+| Mentor (Role B) | 哆啦A梦 (Doraemon) | Blue robot cat, white belly, 4D pocket, red nose, golden bell | Knowledgeable, patient, uses gadgets as technical metaphors |
+| Challenge (Role C) | 胖虎 (Gian) | Stocky boy, small eyes, orange shirt | Represents misunderstanding, or "noise" in the data |
+| Support (Role D) | 静香 (Shizuka) | Cute girl, black short hair, pink dress | Asks clarifying questions, provides alternative perspectives |
+
+**IMPORTANT**: These Doraemon characters ARE the default for ohmsha preset. Generate character definitions using these exact characters unless user requests otherwise.
+
+To use custom characters: ask the user to provide role → character mappings (e.g., `Student:小明, Mentor:教授`).
+
+### Page Title Convention
+
+Every page MUST have a narrative title (not section header):
+
+**Wrong**: "Chapter 1: Introduction to Transformers"
+**Right**: "The Day Nobita Couldn't Understand Anyone"
+
+### Gadget Reveal Pattern
+
+When introducing a concept:
+
+1. Student expresses confusion with visual indicator (？, spiral eyes)
+2. Mentor dramatically produces gadget with sparkle effects
+3. Gadget name announced in bold with explanation
+4. Demonstration begins - student enters metaphorical space
+
+### Ending Requirements
+
+Final page MUST include:
+
+1. Student demonstrating understanding (applying the concept)
+2. Callback to opening problem (now resolved)
+3. Mentor's satisfied expression
+4. Optional: hint at next topic
+
+### NO Talking Heads Rule
+
+**Critical**: Characters must DO things, not just explain.
+
+Every panel should show:
+- Action being performed
+- Metaphor being demonstrated
+- Character interaction with concept-space
+- NOT: two characters facing each other talking
+
+### Special Visual Elements
+
+| Element | Usage |
+|---------|-------|
+| Gadget reveals | Dramatic unveiling with sparkle effects |
+| Concept spaces | Rounded borders, glowing edges for "imagination mode" |
+| Information displays | Holographic UI style for technical details |
+| Aha moments | Radial lines, light burst effects |
+| Confusion | Spiral eyes, question marks floating above head |
+
+## Quality Markers
+
+- ✓ Every concept is a visual metaphor
+- ✓ Characters are DOING things, not just talking
+- ✓ Clear student/mentor dynamic
+- ✓ Gadgets and props drive the explanation
+- ✓ Expressive manga-style emotions
+- ✓ Information density through visual design, not text walls
+- ✓ Narrative page titles
+
+## Reference
+
+For complete guidelines, see `references/ohmsha-guide.md`
@@ -0,0 +1,116 @@
+# shoujo
+
+少女预设 - Classic shoujo manga with romantic aesthetics
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | manga |
+| Tone | romantic |
+| Layout | standard (default) |
+
+Equivalent to: art=manga, tone=romantic
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `shoujo` preset is selected, ALL rules below must be applied.
+
+### Decorative Elements (Required)
+
+Every emotional moment must include decorative elements:
+
+| Emotion | Required Decorations |
+|---------|---------------------|
+| Love | Floating hearts, sparkles, rose petals |
+| Longing | Feathers, bubbles, distant sparkles |
+| Joy | Flowers blooming, light bursts, stars |
+| Sadness | Falling petals, fading sparkles |
+| Shyness | Soft sparkles, floating bubbles |
+| Realization | Radiating lines with sparkles |
+
+### Eye Detail Requirements
+
+Eyes are critical in shoujo style:
+
+| Aspect | Treatment |
+|--------|-----------|
+| Size | Larger than standard manga (1.2x) |
+| Highlights | Multiple (3-5), placed for emotion |
+| Reflection | Scene reflection in emotional moments |
+| Sparkle | Built-in sparkle effects |
+| Tears | Crystalline, detailed teardrops |
+
+### Character Beauty Standards
+
+| Feature | Treatment |
+|---------|-----------|
+| Hair | Flowing, detailed strands, shine highlights |
+| Skin | Porcelain, soft blush on cheeks |
+| Lips | Soft, slightly glossy |
+| Hands | Elegant, expressive gestures |
+| Posture | Graceful, elegant poses |
+
+### Background Effects
+
+**Abstract backgrounds** for emotional moments:
+
+| Moment Type | Background |
+|-------------|-----------|
+| Love confession | Soft gradient + floating flowers |
+| Shock | Screen tone speed lines + sparkles |
+| Memory | Dreamy blur + scattered petals |
+| Realization | Radial lines + light burst |
+| Intimate | Soft focus + floating elements |
+
+### Panel Flow
+
+- Overlap panels for intimate moments
+- Break panel borders for emotional impact
+- Float decorative elements between panels
+- Use screen tone gradients for mood
+- Irregular panel shapes for drama
+
+### Emotional Beat Timing
+
+Slow down pacing for emotional impact:
+
+| Scene Type | Panel Treatment |
+|------------|-----------------|
+| Confession | Multiple small panels, then splash |
+| Eye contact | Close-up sequence |
+| Touch | Slow-motion panel breakdown |
+| Realization | Build-up panels then impact |
+
+### Color Palette Application
+
+| Scene Type | Palette |
+|------------|---------|
+| Romantic | Pink, lavender, rose gold |
+| Happy | Soft yellow, peach, sky blue |
+| Sad | Pale blue, silver, gray lavender |
+| Dramatic | Deep rose, purple, contrast |
+
+### Screen Tone Usage
+
+| Mood | Tone Pattern |
+|------|-------------|
+| Neutral | Clean, minimal |
+| Romantic | Soft gradient overlays |
+| Dramatic | Heavy contrast tones |
+| Dreamy | Soft dot patterns |
+
+## Quality Markers
+
+- ✓ Large, sparkling detailed eyes
+- ✓ Decorative elements in emotional moments
+- ✓ Flowing, beautiful character designs
+- ✓ Soft, pastel color palette
+- ✓ Elegant panel compositions
+- ✓ Screen tone mood effects
+- ✓ Romantic atmosphere throughout
+- ✓ Beautiful, expressive poses
+
+## Best For
+
+Romance stories, coming-of-age, friendship narratives, school life, emotional drama, love stories
@@ -0,0 +1,110 @@
+# wuxia
+
+武侠预设 - Hong Kong martial arts comic style
+
+## Base Configuration
+
+| Dimension | Value |
+|-----------|-------|
+| Art Style | ink-brush |
+| Tone | action |
+| Layout | splash (default) |
+
+Equivalent to: art=ink-brush, tone=action
+
+## Unique Rules
+
+This preset includes special rules beyond the art+tone combination. When the `wuxia` preset is selected, ALL rules below must be applied.
+
+### Qi/Energy Effects (Required)
+
+Martial arts power must be visible through qi effects:
+
+| Effect Type | Visual Treatment |
+|-------------|-----------------|
+| Internal qi | Glowing aura around character |
+| External qi | Visible energy projection |
+| Qi clash | Radiating impact waves |
+| Qi absorption | Flowing particles toward character |
+| Hidden power | Subtle glow in eyes/fists |
+
+### Energy Colors
+
+| Qi Type | Color |
+|---------|-------|
+| Righteous | Blue (#4299E1), Gold (#FFD700) |
+| Fierce | Red (#DC2626), Orange (#EA580C) |
+| Evil | Purple (#7C3AED), Green (#16A34A) |
+| Pure | White, Silver |
+| Ancient | Gold with particles |
+
+### Combat Visual Language
+
+**Impact moments** must include:
+
+1. Speed lines radiating from impact point
+2. Flying debris (stone, wood, cloth)
+3. Shockwave rings
+4. Dust/energy clouds
+5. Hair and clothing blown back
+
+### Movement Depiction
+
+| Speed Level | Visual Treatment |
+|-------------|-----------------|
+| Normal | Standard pose |
+| Fast | Motion blur, speed lines |
+| Lightning | Afterimages, multiple positions |
+| Teleport | Fade effect, particle trail |
+
+### Environmental Integration
+
+Backgrounds must support action:
+
+| Environment | Combat Enhancement |
+|-------------|-------------------|
+| Mountains | Crumbling peaks from impacts |
+| Forest | Exploding trees, flying leaves |
+| Water | Dramatic splashes, walking on water |
+| Temple | Breaking pillars, flying tiles |
+| Cliff | Dramatic falls, wind effects |
+
+### Character Pose Guidelines
+
+- Dynamic warrior stances with weight distribution
+- Flowing robes and hair showing movement
+- Muscle tension visible in action
+- Feet planted or in dynamic motion
+- Traditional martial arts postures
+
+### Weapon Effects
+
+| Weapon | Visual Treatment |
+|--------|-----------------|
+| Sword | Trailing light arc, blade glow |
+| Palm | Qi projection, wind effect |
+| Staff | Spinning blur, impact ripples |
+| Whip | Flowing energy trail |
+
+### Atmospheric Elements
+
+Always include:
+- Floating particles (leaves, petals, dust)
+- Ink wash mist for depth
+- Wind direction indicators
+- Dramatic sky/weather when appropriate
+
+## Quality Markers
+
+- ✓ Dynamic action poses with sense of motion
+- ✓ Ink brush aesthetic in line work
+- ✓ Visible qi/energy effects
+- ✓ High contrast dramatic lighting
+- ✓ Atmospheric backgrounds with Chinese elements
+- ✓ Flowing fabric and hair movement
+- ✓ Impactful combat moments
+- ✓ Speed lines and impact effects
+
+## Best For
+
+Martial arts stories, Chinese historical fiction, wuxia/xianxia adaptations, action-heavy narratives
@@ -0,0 +1,143 @@
+# Storyboard Template
+
+## Storyboard Document Format
+
+```markdown
+---
+title: "[Comic Title]"
+topic: "[topic description]"
+time_span: "[e.g., 1912-1954]"
+narrative_approach: "[chronological/thematic/character-focused]"
+recommended_style: "[style name]"
+recommended_layout: "[layout name or varies]"
+aspect_ratio: "3:4"    # 3:4 (portrait), 4:3 (landscape), 16:9 (widescreen)
+language: "[zh/en/ja/etc.]"
+page_count: [N]
+generated: "YYYY-MM-DD HH:mm"
+---
+
+# [Comic Title] - Knowledge Comic Storyboard
+
+**Character Reference**: characters/characters.png
+
+---
+
+## Cover
+
+**Filename**: 00-cover-[slug].png
+**Core Message**: [one-liner]
+
+**Visual Design**:
+- Title typography style
+- Main visual composition
+- Color scheme
+- Subtitle / time span notation
+
+**Visual Prompt**:
+[Detailed image generation prompt]
+
+---
+
+## Page 1 / N
+
+**Filename**: 01-page-[slug].png
+**Layout**: [standard/cinematic/dense/splash/mixed]
+**Narrative Layer**: [Main narrative / Narrator layer / Mixed]
+**Core Message**: [What this page conveys]
+
+### Panel Layout
+
+**Panel Count**: X
+**Layout Type**: [grid/irregular/splash]
+
+#### Panel 1 (Size: 1/3 page, Position: Top)
+
+**Scene**: [Time, location]
+**Image Description**:
+- Camera angle: [bird's eye / low angle / eye level / close-up / wide shot]
+- Characters: [pose, expression, action]
+- Environment: [scene details, period markers]
+- Lighting: [atmosphere description]
+- Color tone: [palette reference]
+
+**Text Elements**:
+- Dialogue bubble (oval): "Character line"
+- Narrator box (rectangular): 「Narrator commentary」
+- Caption bar: [Background info text]
+
+#### Panel 2...
+
+**Page Hook**: [Cliffhanger or transition at page end]
+
+**Visual Prompt**:
+[Full page image generation prompt]
+
+---
+
+## Page 2 / N
+...
+```
+
+## Cover Design Principles
+
+- Academic gravitas with visual appeal
+- Title typography reflecting knowledge/science theme
+- Composition hinting at core theme (character silhouette, iconic symbol, concept diagram)
+- Subtitle or time span for epic scope
+
+## Panel Composition Guidelines
+
+| Panel Type | Recommended Count | Usage |
+|-----------|-------------------|-------|
+| Main narrative | 3-5 per page | Story progression |
+| Concept diagram | 1-2 per page | Visualize abstractions |
+| Narrator panel | 0-1 per page | Commentary, transition |
+| Splash (full/half) | Occasional | Major moments |
+
+## Panel Size Reference
+
+- **Full page (Splash)**: Major moments, key breakthroughs
+- **Half page**: Important scenes, turning points
+- **1/3 page**: Standard narrative panels
+- **1/4 or smaller**: Quick progression, sequential action
+
+## Concept Visualization Techniques
+
+Transform abstract concepts into concrete visuals:
+
+| Abstract Concept | Visual Approach |
+|-----------------|-----------------|
+| Neural network | Glowing nodes with connecting lines |
+| Gradient descent | Ball rolling down valley terrain |
+| Data flow | Luminous particles flowing through pipes |
+| Algorithm iteration | Ascending spiral staircase |
+| Breakthrough moment | Shattering barrier, piercing light |
+| Logical proof | Building blocks assembling |
+| Uncertainty | Forking paths, fog, multiple shadows |
+
+## Text Element Design
+
+| Text Type | Style | Usage |
+|-----------|-------|-------|
+| Character dialogue | Oval speech bubble | Main narrative speech |
+| Narrator commentary | Rectangular box | Explanation, commentary |
+| Caption bar | Edge-mounted rectangle | Time, location info |
+| Thought bubble | Cloud shape | Character inner monologue |
+| Term label | Bold / special color | First appearance of technical terms |
+
+## Prompt Structure for Consistency
+
+Each page prompt should include character reference:
+
+```
+[CHARACTER REFERENCE]
+(Key details from characters.md for characters in this page)
+
+[PAGE CONTENT]
+(Specific scene, panel layout, and visual elements)
+
+[CONSISTENCY REMINDER]
+Maintain exact character appearances as defined in character reference.
+- [Character A]: [key identifying features]
+- [Character B]: [key identifying features]
+```
@@ -0,0 +1,110 @@
+# action
+
+动作基调 - Speed, impact, power
+
+## Overview
+
+High-impact action atmosphere with dynamic movement, combat effects, and powerful visual energy. Creates visceral, exciting sequences.
+
+## Mood Characteristics
+
+- Speed and motion
+- Power and impact
+- Combat intensity
+- Physical energy
+- Visceral excitement
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High contrast |
+| Contrast | Maximum |
+| Temperature | Variable per effect |
+| Brightness | Dynamic range |
+
+## Action Effects
+
+**Combat/motion effects** (apply liberally):
+
+| Effect | Usage |
+|--------|-------|
+| Speed lines | Motion, velocity |
+| Impact bursts | Hits, collisions |
+| Shockwaves | Powerful impacts |
+| Flying debris | Environmental destruction |
+| Dust clouds | Ground impacts |
+| Motion blur | Fast movement |
+| Afterimages | Super speed |
+
+## Special Effects
+
+| Effect Type | Visual Approach |
+|------------|-----------------|
+| Energy attacks | Glowing, radiating |
+| Physical impacts | Radiating lines, debris |
+| Movement | Speed lines, blur |
+| Atmosphere | Flying particles, wind |
+
+## Effect Colors
+
+| Effect | Color | Hex |
+|--------|-------|-----|
+| Energy glow | Blue | #4299E1 |
+| Fire/power | Gold | #FFD700 |
+| Impact | White burst | #FFFFFF |
+| Blood/intensity | Deep red | #8B0000 |
+
+## Lighting
+
+- Dynamic, shifting
+- Impact flashes
+- Energy glow sources
+- Rim lighting on figures
+- Dramatic contrast
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Determination | Fierce focus |
+| Rage | Intense, powerful |
+| Triumph | Victorious pose |
+| Struggle | Strained effort |
+
+## Composition
+
+- Dynamic angles
+- Extreme perspectives
+- Panel-breaking layouts
+- Asymmetric designs
+- Impact-focused framing
+
+## Pose Guidelines
+
+- Dynamic warrior poses
+- Weight and momentum visible
+- Muscle tension shown
+- Flow of movement captured
+- Impact points emphasized
+
+## Best For
+
+- Martial arts combat
+- Action sequences
+- Sports moments
+- Physical challenges
+- Battle scenes
+- Climactic confrontations
+
+## Combination Notes
+
+Works especially well with:
+- ink-brush: wuxia combat
+- manga: shonen battles
+
+Avoid with:
+- chalk: style mismatch
+- ligne-claire: style mismatch (too static)
@@ -0,0 +1,95 @@
+# dramatic
+
+戏剧基调 - High contrast, intense, powerful moments
+
+## Overview
+
+High-impact dramatic tone for pivotal moments, conflicts, and breakthroughs. Uses strong contrast and intense compositions to create emotional power.
+
+## Mood Characteristics
+
+- Tension and intensity
+- Pivotal moments
+- Conflict and resolution
+- Breakthrough discoveries
+- Emotional climaxes
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant or deep) |
+| Contrast | Maximum |
+| Temperature | Varies for effect |
+| Brightness | Strong highlights, deep shadows |
+
+## Contrast Approach
+
+- Sharp light/dark divisions
+- Minimal mid-tones
+- Stark compositions
+- Silhouette potential
+- Rim lighting effects
+
+## Accent Colors
+
+- Deep navy (#1A365D)
+- Crimson (#9B2C2C)
+- Stark white
+- Heavy blacks
+- Limited palette per scene
+
+## Lighting
+
+- Dramatic single-source
+- High contrast shadows
+- Rim lighting on characters
+- Spotlight effects
+- Chiaroscuro influence
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Anger | Intense, defined features |
+| Determination | Strong, focused gaze |
+| Shock | Wide eyes, stark lighting |
+| Triumph | Powerful, elevated pose |
+
+## Composition
+
+- Angular, dynamic layouts
+- Dramatic camera angles
+- Low/high viewpoints
+- Diagonal compositions
+- Negative space for impact
+
+## Visual Elements
+
+- Speed lines for tension
+- Impact effects
+- Dramatic backgrounds (storms, fire)
+- Silhouettes
+- Light burst effects
+- Environmental drama
+
+## Best For
+
+- Pivotal discoveries
+- Conflict scenes
+- Climactic moments
+- Breakthrough realizations
+- Emotional confrontations
+- Historical turning points
+
+## Combination Notes
+
+Works especially well with:
+- realistic: powerful drama
+- ink-brush: martial arts climax
+- ligne-claire: historical pivots
+- manga: shonen battles
+
+Avoid with: chalk (style mismatch)
@@ -0,0 +1,105 @@
+# energetic
+
+活力基调 - Bright, dynamic, exciting
+
+## Overview
+
+High-energy atmosphere for exciting, discovery-filled content. Bright colors, dynamic compositions, and movement create engaging visuals for younger audiences.
+
+## Mood Characteristics
+
+- Excitement and wonder
+- Discovery and learning
+- Energy and enthusiasm
+- Movement and action
+- Youthful spirit
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | High (vibrant) |
+| Contrast | Medium-high |
+| Temperature | Variable, punchy |
+| Brightness | Bright, clean |
+
+## Color Palette
+
+Shift toward vibrant tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary Red | Bright red | #F56565 |
+| Primary Yellow | Sunny yellow | #F6E05E |
+| Primary Blue | Sky blue | #63B3ED |
+| Accent 1 | Magenta | #D53F8C |
+| Accent 2 | Lime green | #68D391 |
+| Background | Clean white | #FFFFFF |
+| Background Alt | Bright pastels | Various |
+
+## Lighting
+
+- Bright, clear lighting
+- Clean shadows
+- High energy
+- Spotlight effects for emphasis
+- Dynamic light sources
+
+## Dynamic Elements
+
+**Energy effects** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Speed lines | Motion, excitement |
+| Sparkles | Discoveries |
+| Burst effects | Aha moments |
+| Motion blur | Fast action |
+| Star bursts | Emphasis |
+| Sweat drops | Effort/surprise |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Excitement | Wide eyes, big smile |
+| Surprise | Dramatic reaction |
+| Determination | Intense focus |
+| Wonder | Sparkling eyes |
+
+## Composition
+
+- Dynamic angles
+- Action-oriented layouts
+- Movement emphasis
+- Clean, punchy designs
+- Energy flows
+
+## Visual Style
+
+- Expressive, animated characters
+- Wide eyes, big reactions
+- Dynamic poses
+- Motion and action focus
+- Simplified backgrounds for energy
+
+## Best For
+
+- Science explanations
+- "Aha" moments
+- Young audience content
+- Discovery narratives
+- Learning adventures
+- Action tutorials
+
+## Combination Notes
+
+Works especially well with:
+- manga: shonen energy
+- chalk: fun education
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
@@ -0,0 +1,63 @@
+# neutral
+
+中性基调 - Balanced, rational, educational
+
+## Overview
+
+Default balanced tone suitable for educational and informative content. Neither overly emotional nor cold - creates accessible, professional atmosphere.
+
+## Mood Characteristics
+
+- Balanced emotional register
+- Clear, rational presentation
+- Educational focus
+- Professional but approachable
+- Objective storytelling
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Standard (no shift) |
+| Contrast | Balanced |
+| Temperature | Neutral |
+| Brightness | Slightly bright |
+
+## Lighting
+
+- Even, clear lighting
+- Minimal dramatic shadows
+- Consistent across panels
+- Natural light sources
+- No extreme contrast
+
+## Emotional Range
+
+| Emotion | Expression Level |
+|---------|-----------------|
+| Joy | Moderate smile |
+| Concern | Thoughtful expression |
+| Surprise | Mild widening of eyes |
+| Frustration | Slight frown |
+
+## Composition
+
+- Balanced panel layouts
+- Clear focal points
+- Readable hierarchies
+- Standard framing
+- Functional compositions
+
+## Best For
+
+- Educational content
+- Technical tutorials
+- Informative biographies
+- Documentary style
+- Professional topics
+
+## Usage Notes
+
+Neutral is the default tone. Combine with any art style for baseline professional output. Most versatile tone option.
@@ -0,0 +1,100 @@
+# romantic
+
+浪漫基调 - Soft, beautiful, emotionally delicate
+
+## Overview
+
+Soft, dreamy atmosphere for romantic and emotionally delicate content. Features decorative elements, sparkles, and beautiful compositions that emphasize feeling and beauty.
+
+## Mood Characteristics
+
+- Romance and love
+- Beauty and elegance
+- Emotional delicacy
+- Dreams and hopes
+- Youth and idealism
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Soft pastels |
+| Contrast | Low, gentle |
+| Temperature | Slightly warm pink |
+| Brightness | Soft, glowing |
+
+## Color Palette
+
+Shift toward romantic tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Soft pink | #FFB6C1 |
+| Secondary | Lavender | #E6E6FA |
+| Accent | Rose | #FF69B4 |
+| Highlight | Pearl white | #FFFAF0 |
+| Gold | Gold sparkle | #FFD700 |
+| Skin | Porcelain | #FFF5EE |
+| Blush | Soft blush | #FFE4E1 |
+| Background | Soft cream | #FFF8DC |
+
+## Lighting
+
+- Soft, diffused light
+- Glowing effects
+- Backlighting halos
+- Sparkle highlights
+- Dreamy atmospheres
+
+## Decorative Elements
+
+**Essential decorations** (add to compositions):
+
+| Element | Usage |
+|---------|-------|
+| Flower petals | Floating, framing |
+| Sparkles | Emotional highlights |
+| Bubbles | Dreamy moments |
+| Feathers | Gentle floating |
+| Stars | Night scenes, wonder |
+| Hearts | Love emphasis |
+| Light halos | Character highlights |
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Love | Soft gaze, blush |
+| Longing | Distant, beautiful sadness |
+| Joy | Radiant smile, sparkles |
+| Shyness | Downcast eyes, blush |
+
+## Composition
+
+- Elegant, flowing layouts
+- Soft focus backgrounds
+- Characters framed by decorations
+- Beautiful angles (3/4 profiles)
+- Screen tone gradients
+
+## Best For
+
+- Romance stories
+- Coming-of-age
+- Friendship narratives
+- Emotional drama
+- School life
+- Beautiful moments
+
+## Combination Notes
+
+Works especially well with:
+- manga: classic shoujo style
+
+Avoid with:
+- realistic: style mismatch
+- ink-brush: style mismatch
+- ligne-claire: style mismatch
+- chalk: style mismatch
@@ -0,0 +1,104 @@
+# vintage
+
+复古基调 - Historical, aged, period authenticity
+
+## Overview
+
+Historical atmosphere with aged paper effects and period-appropriate aesthetics. Creates sense of time, authenticity, and historical distance.
+
+## Mood Characteristics
+
+- Historical authenticity
+- Period distance
+- Archival quality
+- Time and memory
+- Classical elegance
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Reduced, muted |
+| Contrast | Medium, aged |
+| Temperature | Sepia shift |
+| Brightness | Slightly faded |
+
+## Color Palette
+
+Shift toward aged tones:
+
+| Role | Color | Hex |
+|------|-------|-----|
+| Primary | Sepia brown | #8B7355 |
+| Background | Aged paper | #F5E6D3 |
+| Accent 1 | Faded teal | #6B8E8E |
+| Accent 2 | Muted burgundy | #7B3F3F |
+| Ink | Aged black | #3D3D3D |
+| Yellowed | Paper yellow | #F5DEB3 |
+
+## Visual Effects
+
+**Aging effects** (apply subtly):
+
+| Effect | Application |
+|--------|-------------|
+| Paper aging | Background texture |
+| Faded edges | Vignette effect |
+| Dust specks | Subtle overlay |
+| Yellowing | Color shift |
+| Wear marks | Corner/edge details |
+
+## Period Elements
+
+- Historical typography
+- Period-accurate details
+- Archival presentation
+- Classical compositions
+- Formal framing
+
+## Lighting
+
+- Natural, period-appropriate
+- Oil lamp/candle warmth
+- Soft, diffused light
+- Indoor historical lighting
+- Photographic quality
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Dignity | Formal, composed |
+| Sorrow | Restrained, elegant |
+| Pride | Classical posture |
+| Wisdom | Aged grace |
+
+## Composition
+
+- Classical framing
+- Formal compositions
+- Period-appropriate staging
+- Documentary style
+- Historical accuracy priority
+
+## Best For
+
+- Pre-1950s stories
+- Classical science history
+- Historical biographies
+- Period pieces
+- Documentary comics
+- Archival narratives
+
+## Combination Notes
+
+Works especially well with:
+- realistic: period drama
+- ligne-claire: historical adventure
+- ink-brush: classical Asian stories
+
+Avoid with:
+- manga: style mismatch (too modern)
+- chalk: style mismatch (modern educational)
@@ -0,0 +1,94 @@
+# warm
+
+温馨基调 - Nostalgic, personal, comforting
+
+## Overview
+
+Warm, inviting atmosphere for personal stories and nostalgic content. Creates emotional connection through cozy aesthetics and comforting visuals.
+
+## Mood Characteristics
+
+- Nostalgic feeling
+- Personal, intimate atmosphere
+- Comforting and healing
+- Memory and reflection
+- Gentle emotional warmth
+
+## Color Modifiers
+
+When applied to any art style:
+
+| Adjustment | Direction |
+|------------|-----------|
+| Saturation | Slightly reduced |
+| Contrast | Softer |
+| Temperature | Warm shift (+15%) |
+| Brightness | Soft, golden |
+
+## Color Temperature
+
+Shift palette toward warm tones:
+
+| Original | Warm Shift |
+|----------|-----------|
+| Cool blue | Soft teal |
+| Pure white | Cream |
+| Gray | Warm gray |
+| Black | Soft charcoal |
+
+## Accent Colors
+
+- Golden yellow (#D69E2E)
+- Soft orange (#DD6B20)
+- Warm brown (#8B6F47)
+- Sunset tones
+
+## Lighting
+
+- Golden hour lighting
+- Soft, diffused light
+- Warm indoor glow
+- Candle/lamp warmth
+- Gentle shadows
+
+## Emotional Range
+
+| Emotion | Expression |
+|---------|-----------|
+| Joy | Genuine warm smile |
+| Sadness | Gentle melancholy |
+| Love | Soft, tender expressions |
+| Memory | Distant, reflective gaze |
+
+## Composition
+
+- Intimate framing
+- Cozy environments
+- Soft focus backgrounds
+- Welcoming spaces
+- Personal moments highlighted
+
+## Visual Elements
+
+- Warm light rays
+- Soft edges
+- Nostalgic props (old photos, keepsakes)
+- Comfort objects (blankets, tea cups)
+- Nature elements (autumn leaves, sunset)
+
+## Best For
+
+- Personal stories
+- Childhood memories
+- Mentorship narratives
+- Family histories
+- Gentle biographies
+- Healing journeys
+
+## Combination Notes
+
+Works especially well with:
+- ligne-claire: nostalgic European comics
+- realistic: touching human stories
+- manga: slice-of-life warmth
+- chalk: nostalgic education
@@ -0,0 +1,401 @@
+# Complete Workflow
+
+Full workflow for generating knowledge comics.
+
+## Progress Checklist
+
+Copy and track progress:
+
+```
+Comic Progress:
+- [ ] Step 1: Setup & Analyze
+  - [ ] 1.1 Analyze content
+  - [ ] 1.2 Check existing ⚠️ REQUIRED
+- [ ] Step 2: Confirmation - Style & options ⚠️ REQUIRED
+- [ ] Step 3: Generate storyboard + characters
+- [ ] Step 4: Review outline (conditional)
+- [ ] Step 5: Generate prompts
+- [ ] Step 6: Review prompts (conditional)
+- [ ] Step 7: Generate images
+  - [ ] 7.1 Character sheet (if needed)
+  - [ ] 7.2 Generate pages
+- [ ] Step 8: Completion report
+```
+
+## Flow Diagram
+
+```
+Input → Analyze → [Check Existing?] → [Confirm: Style + Reviews] → Storyboard → [Review Outline?] → Prompts → [Review Prompts?] → Images → Complete
+```
+
+---
+
+## Step 1: Setup & Analyze
+
+### 1.1 Analyze Content → `analysis.md`
+
+Read source content, save it if needed, and perform deep analysis.
+
+**Actions**:
+1. **Save source content** (if not already a file):
+   - If user provides a file path: use as-is
+   - If user pastes content: save to `source-{slug}.md` in the target directory using `write_file`, where `{slug}` is the kebab-case topic slug used for the output directory
+   - **Backup rule**: If `source-{slug}.md` already exists, rename it to `source-{slug}-backup-YYYYMMDD-HHMMSS.md` before writing
+2. Read source content
+3. **Deep analysis** following `analysis-framework.md`:
+   - Target audience identification
+   - Value proposition for readers
+   - Core themes and narrative potential
+   - Key figures and their story arcs
+4. Detect source language
+5. **Determine language**:
+   - If user specified a language → use it
+   - Else → use detected source language or user's conversation language
+6. Determine recommended page count:
+   - Short story: 5-8 pages
+   - Medium complexity: 9-15 pages
+   - Full biography: 16-25 pages
+7. Analyze content signals for art/tone/layout recommendations
+8. **Save to `analysis.md`** using `write_file`
+
+**analysis.md Format**: YAML front matter (title, topic, time_span, source_language, user_language, aspect_ratio, recommended_page_count, recommended_art, recommended_tone) + sections for Target Audience, Value Proposition, Core Themes, Key Figures & Story Arcs, Content Signals, Recommended Approaches. See `analysis-framework.md` for full template.
+
+### 1.2 Check Existing Content ⚠️ REQUIRED
+
+**MUST execute before proceeding to Step 2.**
+
+Check if the output directory exists (e.g., via `test -d "comic/{topic-slug}"`).
+
+**If directory exists**, use `clarify`:
+
+```
+question: "Existing content found at comic/{topic-slug}. How to proceed?"
+options:
+  - "Regenerate storyboard — Keep images, regenerate storyboard and characters only"
+  - "Regenerate images — Keep storyboard, regenerate images only"
+  - "Backup and regenerate — Backup to {slug}-backup-{timestamp}, then regenerate all"
+  - "Exit — Cancel, keep existing content unchanged"
+```
+
+Save result and handle accordingly:
+- **Regenerate storyboard**: Skip to Step 3, preserve `prompts/` and images
+- **Regenerate images**: Skip to Step 7, use existing prompts
+- **Backup and regenerate**: Move directory, start fresh from Step 2
+- **Exit**: End workflow immediately
+
+---
+
+## Step 2: Confirmation - Style & Options ⚠️
+
+**Purpose**: Select visual style + decide whether to review outline before generation. **Do NOT skip.**
+
+**Display summary first**:
+- Content type + topic identified
+- Key figures extracted
+- Time span detected
+- Recommended page count
+- Language (detected or user-specified)
+- **Recommended style**: [art] + [tone] (based on content signals)
+
+**Use `clarify` one question at a time**, in priority order:
+
+> **Timeout handling (CRITICAL)**: if `clarify` returns `"The user did not provide a response within the time limit. Use your best judgement..."`, that is a per-question default, NOT blanket consent. Continue to the next question in the sequence — do not bail out of Step 2. Then, in your next user-visible message, explicitly surface every default that was taken (e.g. `"Defaulted style → ohmsha, narrative focus → concept explanation, audience → developers (clarify timed out on all three). Say the word to redirect."`). An unreported default is indistinguishable to the user from "the agent never asked."
+
+### Question 1: Visual Style
+
+If a preset is recommended (see `auto-selection.md`), show it first:
+
+```
+question: "Which visual style for this comic?"
+options:
+  - "[preset name] preset (Recommended) — [preset description] with special rules"
+  - "[recommended art] + [recommended tone] (Recommended) — Best match for your content"
+  - "ligne-claire + neutral — Classic educational, Logicomix style"
+  - "ohmsha preset — Educational manga with visual metaphors, gadgets, NO talking heads"
+  - "Custom — Specify your own art + tone or preset"
+```
+
+**Preset vs Art+Tone**: Presets include special rules beyond art+tone. `ohmsha` = manga + neutral + visual metaphor rules + character roles + NO talking heads. Plain `manga + neutral` does NOT include these rules.
+
+### Question 2: Narrative Focus
+
+```
+question: "What should the comic emphasize? (Pick the primary focus; mention others in a follow-up if needed)"
+options:
+  - "Biography/life story — Follow a person's journey through key life events"
+  - "Concept explanation — Break down complex ideas visually"
+  - "Historical event — Dramatize important historical moments"
+  - "Tutorial/how-to — Step-by-step educational guide"
+```
+
+### Question 3: Target Audience
+
+```
+question: "Who is the primary reader?"
+options:
+  - "General readers — Broad appeal, accessible content"
+  - "Students/learners — Educational focus, clear explanations"
+  - "Industry professionals — Technical depth, domain knowledge"
+  - "Children/young readers — Simplified language, engaging visuals"
+```
+
+### Question 4: Outline Review
+
+```
+question: "Do you want to review the outline before image generation?"
+options:
+  - "Yes, let me review (Recommended) — Review storyboard and characters before generating images"
+  - "No, generate directly — Skip outline review, start generating immediately"
+```
+
+### Question 5: Prompt Review
+
+```
+question: "Review prompts before generating images?"
+options:
+  - "Yes, review prompts (Recommended) — Review image generation prompts before generating"
+  - "No, skip prompt review — Proceed directly to image generation"
+```
+
+**After responses**:
+1. Update `analysis.md` with user preferences
+2. **Store `skip_outline_review`** flag based on Question 4 response
+3. **Store `skip_prompt_review`** flag based on Question 5 response
+4. → Step 3
+
+---
+
+## Step 3: Generate Storyboard + Characters
+
+Create storyboard and character definitions using the confirmed style from Step 2.
+
+**Loading Style References**:
+- Art style: `art-styles/{art}.md`
+- Tone: `tones/{tone}.md`
+- If preset (ohmsha/wuxia/shoujo/concept-story/four-panel): also load `presets/{preset}.md`
+
+**Generate**:
+
+1. **Storyboard** (`storyboard.md`):
+   - YAML front matter with art_style, tone, layout, aspect_ratio
+   - Cover design
+   - Each page: layout, panel breakdown, visual prompts
+   - **Written in user's preferred language** (from Step 1)
+   - Reference: `storyboard-template.md`
+   - **If using preset**: Load and apply preset rules from `presets/`
+
+2. **Character definitions** (`characters/characters.md`):
+   - Visual specs matching the art style (in user's preferred language)
+   - Include Reference Sheet Prompt for later image generation
+   - Reference: `character-template.md`
+   - **If using ohmsha preset**: Use default Doraemon characters (see below)
+
+**Ohmsha Default Characters** (use these unless user specifies custom characters):
+
+| Role | Character | Visual Description |
+|------|-----------|-------------------|
+| Student | 大雄 (Nobita) | Japanese boy, 10yo, round glasses, black hair parted in middle, yellow shirt, navy shorts |
+| Mentor | 哆啦 A 梦 (Doraemon) | Round blue robot cat, big white eyes, red nose, whiskers, white belly with 4D pocket, golden bell, no ears |
+| Challenge | 胖虎 (Gian) | Stocky boy, rough features, small eyes, orange shirt |
+| Support | 静香 (Shizuka) | Cute girl, black short hair, pink dress, gentle expression |
+
+These are the canonical ohmsha-style characters. Do NOT create custom characters for ohmsha unless explicitly requested.
+
+**After generation**:
+- If `skip_outline_review` is true → Skip Step 4, go directly to Step 5
+- If `skip_outline_review` is false → Continue to Step 4
+
+---
+
+## Step 4: Review Outline (Conditional)
+
+**Skip this step** if user selected "No, generate directly" in Step 2.
+
+**Purpose**: User reviews and confirms storyboard + characters before generation.
+
+**Display**:
+- Page count and structure
+- Art style + Tone combination
+- Page-by-page summary (Cover → P1 → P2...)
+- Character list with brief descriptions
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with this outline?"
+options:
+  - "Yes, proceed (Recommended) — Generate character sheet and comic pages"
+  - "Edit storyboard first — I'll modify storyboard.md before continuing"
+  - "Edit characters first — I'll modify characters/characters.md before continuing"
+  - "Edit both — I'll modify both files before continuing"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user confirms → Continue to Step 5
+
+---
+
+## Step 5: Generate Prompts
+
+Create image generation prompts for all pages.
+
+**Style Reference Loading**:
+- Read `art-styles/{art}.md` for rendering guidelines
+- Read `tones/{tone}.md` for mood/color adjustments
+- If preset: Read `presets/{preset}.md` for special rules
+
+**For each page (cover + pages)**:
+1. Create prompt following art style + tone guidelines
+2. **Embed character descriptions** inline (copy relevant traits from `characters/characters.md`) — `image_generate` is prompt-only, so the prompt text is the sole vehicle for character consistency
+3. Save to `prompts/NN-{cover|page}-[slug].md` using `write_file`
+   - **Backup rule**: If prompt file exists, rename to `prompts/NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.md`
+
+**Prompt File Format**:
+```markdown
+# Page NN: [Title]
+
+## Visual Style
+Art: [art style] | Tone: [tone] | Layout: [layout type]
+
+## Character Reference (embedded inline — maintain exact traits below)
+- [Character A]: [detailed visual traits from characters/characters.md]
+- [Character B]: [detailed visual traits from characters/characters.md]
+
+## Panel Breakdown
+[From storyboard.md - panel descriptions, actions, dialogue]
+
+## Generation Prompt
+[Combined prompt passed to image_generate]
+```
+
+**After generation**:
+- If `skip_prompt_review` is true → Skip Step 6, go directly to Step 7
+- If `skip_prompt_review` is false → Continue to Step 6
+
+---
+
+## Step 6: Review Prompts (Conditional)
+
+**Skip this step** if user selected "No, skip prompt review" in Step 2.
+
+**Purpose**: User reviews and confirms prompts before image generation.
+
+**Display prompt summary table**:
+
+| Page | Title | Key Elements |
+|------|-------|--------------|
+| Cover | [title] | [main visual] |
+| P1 | [title] | [key elements] |
+| ... | ... | ... |
+
+**Use `clarify`**:
+
+```
+question: "Ready to generate images with these prompts?"
+options:
+  - "Yes, proceed (Recommended) — Generate all comic page images"
+  - "Edit prompts first — I'll modify prompts/*.md before continuing"
+  - "Regenerate prompts — Regenerate all prompts with different approach"
+```
+
+**After response**:
+1. If user wants to edit → Wait for user to finish editing, then ask again
+2. If user wants to regenerate → Go back to Step 5
+3. If user confirms → Continue to Step 7
+
+---
+
+## Step 7: Generate Images
+
+With confirmed prompts from Step 5/6, use the `image_generate` tool. The tool accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`) and **returns a URL** — it does not accept reference images and does not write local files. Every invocation must be followed by a download step.
+
+**Aspect ratio mapping** — map the storyboard's `aspect_ratio` to the tool's enum:
+
+| Storyboard ratio | `image_generate` format |
+|------------------|-------------------------|
+| `3:4`, `9:16`, `2:3` | `portrait` |
+| `4:3`, `16:9`, `3:2` | `landscape` |
+| `1:1` | `square` |
+
+**Download procedure** (run after every successful `image_generate` call):
+
+1. Extract the `url` field from the tool result
+2. Fetch it to disk, e.g. `curl -fsSL "<url>" -o comic/{slug}/<target>.png`
+3. Verify the file is non-empty (`test -s <target>.png`); on failure, retry the generation once
+
+### 7.1 Generate Character Reference Sheet (conditional)
+
+Character sheet is recommended for multi-page comics with recurring characters, but **NOT required** for all presets.
+
+**When to generate**:
+
+| Condition | Action |
+|-----------|--------|
+| Multi-page comic with detailed/recurring characters | Generate character sheet (recommended) |
+| Preset with simplified characters (e.g., four-panel minimalist) | Skip — prompt descriptions are sufficient |
+| Single-page comic | Skip unless characters are complex |
+
+**When generating**:
+1. Use Reference Sheet Prompt from `characters/characters.md`
+2. **Backup rule**: If `characters/characters.png` exists, rename to `characters/characters-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with `landscape` format
+4. Download the returned URL → save to `characters/characters.png`
+
+**Important**: the downloaded sheet is a **human-facing review artifact** (so the user can visually verify character design) and a reference for later regenerations or manual prompt edits. It does **not** drive Step 7.2 — page prompts were already written in Step 5 from the text descriptions in `characters/characters.md`. `image_generate` cannot accept images as visual input, so the text is the sole cross-page consistency mechanism.
+
+### 7.2 Generate Comic Pages
+
+**Before generating any page**:
+1. Confirm each prompt file exists at `prompts/NN-{cover|page}-[slug].md`
+2. Confirm that each prompt has character descriptions embedded inline (see Step 5). `image_generate` is prompt-only, so the prompt text is the sole consistency mechanism.
+
+**Page Generation Strategy**: every page prompt must embed character descriptions (sourced from `characters/characters.md`) inline. This is done during Step 5, uniformly whether or not the PNG sheet was produced in 7.1 — the PNG is only a review/regeneration aid, never a generation input.
+
+**Example embedded prompt** (`prompts/01-page-xxx.md`):
+
+```markdown
+# Page 01: [Title]
+
+## Character Reference (embedded inline — maintain consistency)
+- 大雄：Japanese boy, round glasses, yellow shirt, navy shorts, worried expression...
+- 哆啦 A 梦：Round blue robot cat, white belly, red nose, golden bell, 4D pocket...
+
+## Page Content
+[Original page prompt body — panels, dialogue, visual metaphors]
+```
+
+**For each page (cover + pages)**:
+1. Read prompt from `prompts/NN-{cover|page}-[slug].md`
+2. **Backup rule**: If image file exists, rename to `NN-{cover|page}-[slug]-backup-YYYYMMDD-HHMMSS.png`
+3. Call `image_generate` with the prompt text and mapped aspect ratio
+4. Download the returned URL → save to `NN-{cover|page}-[slug].png`
+5. Report progress after each generation: "Generated X/N: [page title]"
+
+---
+
+## Step 8: Completion Report
+
+```
+Comic Complete!
+Title: [title] | Art: [art] | Tone: [tone] | Pages: [count] | Aspect: [ratio] | Language: [lang]
+Location: [path]
+✓ source-{slug}.md (if content was pasted)
+✓ analysis.md
+✓ characters.png (if generated)
+✓ 00-cover-[slug].png ... NN-page-[slug].png
+```
+
+---
+
+## Page Modification
+
+| Action | Steps |
+|--------|-------|
+| **Edit** | Update prompt → Regenerate image → Download new PNG |
+| **Add** | Create prompt at position → Generate image → Download PNG → Renumber subsequent (NN+1) → Update storyboard |
+| **Delete** | Remove files → Renumber subsequent (NN-1) → Update storyboard |
+
+**File naming**: `NN-{cover|page}-[slug].png` (e.g., `03-page-enigma-machine.png`)
+- Slugs: kebab-case, unique, derived from content
+- Renumbering: Update NN prefix only, slugs unchanged
@@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            from agent.auxiliary_client import _try_nous
@@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
            client, model = _try_nous()

        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
        assert model == "google/gemini-3-flash-preview"
        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
        assert mock_openai.call_args.kwargs["base_url"] == fresh_base

+    def test_try_nous_uses_portal_recommendation_for_text(self):
+        """When the Portal recommends a compaction model, _try_nous honors it."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous(vision=False)
+
+        assert client is not None
+        assert model == "minimax/minimax-m2.7"
+        assert mock_rec.call_args.kwargs["vision"] is False
+
+    def test_try_nous_uses_portal_recommendation_for_vision(self):
+        """Vision tasks should ask for the vision-specific recommendation."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous(vision=True)
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_rec.call_args.kwargs["vision"] is True
+
+    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
+        """If the Portal lookup throws, we must still return a usable model."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
    def test_call_llm_retries_nous_after_401(self):
        class _Auth401(Exception):
            status_code = 401
@@ -253,6 +253,35 @@ class TestSummaryPrefixNormalization:


 class TestCompressWithClient:
+    def test_system_content_list_gets_compression_note_without_crashing(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [
+            {"role": "system", "content": [{"type": "text", "text": "system prompt"}]},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        assert isinstance(result[0]["content"], list)
+        assert any(
+            isinstance(block, dict)
+            and "compacted into a handoff summary" in block.get("text", "")
+            for block in result[0]["content"]
+        )
+
    def test_summarization_path(self):
        mock_client = MagicMock()
        mock_response = MagicMock()
@@ -460,6 +489,41 @@ class TestCompressWithClient:
        assert len(first_tail) == 1
        assert "summary text" in first_tail[0]["content"]

+    def test_double_collision_merges_summary_into_list_tail_content(self):
+        """Structured tail content should accept a merged summary without TypeError."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "user", "content": [{"type": "text", "text": "msg 6"}]},
+            {"role": "assistant", "content": "msg 7"},
+            {"role": "user", "content": "msg 8"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        merged_tail = next(
+            m for m in result
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+        )
+        assert isinstance(merged_tail["content"], list)
+        assert "summary text" in merged_tail["content"][0]["text"]
+        assert any(
+            isinstance(block, dict) and block.get("text") == "msg 6"
+            for block in merged_tail["content"]
+        )
+
    def test_double_collision_user_head_assistant_tail(self):
        """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
        summary='assistant' collides with tail, 'user' collides with head → merge."""
@@ -298,9 +298,15 @@ class TestClassifyApiError:
        assert result.retryable is False

    def test_404_generic(self):
+        # Generic 404 with no "model not found" signal — common for local
+        # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
+        # as unknown (retryable) so the real error surfaces, rather than
+        # claiming the model is missing and silently falling back.
        e = MockAPIError("Not Found", status_code=404)
        result = classify_api_error(e)
-        assert result.reason == FailoverReason.model_not_found
+        assert result.reason == FailoverReason.unknown
+        assert result.retryable is True
+        assert result.should_fallback is False

    # ── Payload too large ──

@@ -943,3 +949,94 @@ class TestAdversarialEdgeCases:
        e = MockAPIError("server error", status_code=500, body={"message": None})
        result = classify_api_error(e)
        assert result is not None
+
+
+# ── Test: SSL/TLS transient errors ─────────────────────────────────────
+
+class TestSSLTransientPatterns:
+    """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
+    should NOT trigger context compression even on a large session.
+
+    Motivation: OpenSSL 3.x changed TLS alert error code format
+    (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+    breaking string-exact matching in downstream retry logic.  We match
+    stable substrings instead.
+    """
+
+    def test_bad_record_mac_classifies_as_timeout(self):
+        """OpenSSL 3.x mid-stream bad record mac alert."""
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_openssl_3x_format_classifies_as_timeout(self):
+        """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
+        because we key on both space- and underscore-separated forms of
+        the stable `bad_record_mac` token."""
+        e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_tls_alert_internal_error_classifies_as_timeout(self):
+        e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_ssl_handshake_failure_classifies_as_timeout(self):
+        e = Exception("ssl handshake failure during mid-stream")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_prefix_classifies_as_timeout(self):
+        """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
+        e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_alert_on_large_session_does_not_compress(self):
+        """Critical: SSL alerts on big contexts must NOT trigger context
+        compression — compression is expensive and won't fix a transport
+        hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
+        _SERVER_DISCONNECT_PATTERNS.
+        """
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,      # 90% of a 200k-context window
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.timeout
+        assert result.should_compress is False
+
+    def test_plain_disconnect_on_large_session_still_compresses(self):
+        """Regression guard: the context-overflow-via-disconnect path
+        (non-SSL disconnects on large sessions) must still trigger
+        compression.  Only SSL-specific disconnects skip it.
+        """
+        e = Exception("Server disconnected without sending a response")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.context_overflow
+        assert result.should_compress is True
+
+    def test_real_ssl_error_type_classifies_as_timeout(self):
+        """Real ssl.SSLError instance — the type name alone (not message)
+        should route to the transport bucket."""
+        import ssl
+        e = ssl.SSLError("arbitrary ssl error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
@@ -0,0 +1,111 @@
+"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
+
+from __future__ import annotations
+
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+class _FakeProvider(ImageGenProvider):
+    def __init__(self, name: str, available: bool = True):
+        self._name = name
+        self._available = available
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return self._available
+
+    def generate(self, prompt, aspect_ratio="landscape", **kw):
+        return {"success": True, "image": f"{self._name}://{prompt}"}
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class TestRegisterProvider:
+    def test_register_and_lookup(self):
+        provider = _FakeProvider("fake")
+        image_gen_registry.register_provider(provider)
+        assert image_gen_registry.get_provider("fake") is provider
+
+    def test_rejects_non_provider(self):
+        with pytest.raises(TypeError):
+            image_gen_registry.register_provider("not a provider")  # type: ignore[arg-type]
+
+    def test_rejects_empty_name(self):
+        class Empty(ImageGenProvider):
+            @property
+            def name(self) -> str:
+                return ""
+
+            def generate(self, prompt, aspect_ratio="landscape", **kw):
+                return {}
+
+        with pytest.raises(ValueError):
+            image_gen_registry.register_provider(Empty())
+
+    def test_reregister_overwrites(self):
+        a = _FakeProvider("same")
+        b = _FakeProvider("same")
+        image_gen_registry.register_provider(a)
+        image_gen_registry.register_provider(b)
+        assert image_gen_registry.get_provider("same") is b
+
+    def test_list_is_sorted(self):
+        image_gen_registry.register_provider(_FakeProvider("zeta"))
+        image_gen_registry.register_provider(_FakeProvider("alpha"))
+        names = [p.name for p in image_gen_registry.list_providers()]
+        assert names == ["alpha", "zeta"]
+
+
+class TestGetActiveProvider:
+    def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("solo"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "solo"
+
+    def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "fal"
+
+    def test_explicit_config_wins(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "openai"}})
+        )
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        image_gen_registry.register_provider(_FakeProvider("openai"))
+        active = image_gen_registry.get_active_provider()
+        assert active is not None and active.name == "openai"
+
+    def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
+        import yaml
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"image_gen": {"provider": "replicate"}})
+        )
+        # Only FAL is registered — configured provider doesn't exist
+        image_gen_registry.register_provider(_FakeProvider("fal"))
+        active = image_gen_registry.get_active_provider()
+        # Falls back to FAL preference (legacy default) rather than None
+        assert active is not None and active.name == "fal"
+
+    def test_none_when_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert image_gen_registry.get_active_provider() is None
@@ -0,0 +1,115 @@
+"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
+
+Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
+but has its own thinking semantics.  When ``thinking.enabled`` is present in
+the request, Kimi validates the message history and requires every prior
+assistant tool-call message to carry OpenAI-style ``reasoning_content``.
+
+The Anthropic path never populates that field, and
+``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
+third-party endpoints — so after one turn with tool calls the next request
+fails with HTTP 400::
+
+    thinking is enabled but reasoning_content is missing in assistant
+    tool call message at index N
+
+Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
+``ChatCompletionsTransport`` (#13503).  On the Anthropic route the right
+thing to do is drop the parameter entirely and let Kimi drive reasoning
+server-side.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestKimiCodingSkipsAnthropicThinking:
+    """build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
+
+    @pytest.mark.parametrize(
+        "base_url",
+        [
+            "https://api.kimi.com/coding",
+            "https://api.kimi.com/coding/v1",
+            "https://api.kimi.com/coding/anthropic",
+            "https://api.kimi.com/coding/",
+        ],
+    )
+    def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=base_url,
+        )
+        assert "thinking" not in kwargs, (
+            "Anthropic thinking must not be sent to Kimi /coding — "
+            "endpoint requires reasoning_content on history we don't preserve."
+        )
+        assert "output_config" not in kwargs
+
+    def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": False},
+            base_url="https://api.kimi.com/coding",
+        )
+        assert "thinking" not in kwargs
+
+    def test_non_kimi_third_party_still_gets_thinking(self) -> None:
+        """MiniMax and other third-party Anthropic endpoints must retain thinking."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.minimax.io/anthropic",
+        )
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+
+    def test_native_anthropic_still_gets_thinking(self) -> None:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-20250514",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url=None,
+        )
+        assert "thinking" in kwargs
+
+    def test_kimi_root_endpoint_unaffected(self) -> None:
+        """Only the /coding route is special-cased — plain api.kimi.com is not.
+
+        ``api.kimi.com`` without ``/coding`` uses the chat_completions transport
+        (see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
+        should never see it, but if it somehow does we should not suppress
+        thinking there — that path has different semantics.
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        kwargs = build_anthropic_kwargs(
+            model="kimi-k2.5",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+            base_url="https://api.kimi.com/v1",
+        )
+        assert "thinking" in kwargs
@@ -106,3 +106,25 @@ class TestIsLocalEndpoint:
    ])
    def test_remote_endpoints(self, url):
        assert is_local_endpoint(url) is False
+
+    @pytest.mark.parametrize("url", [
+        "http://100.64.0.0:11434",            # lower bound of CGNAT block
+        "http://100.64.0.1:11434/v1",         # lower bound +1
+        "http://100.77.243.5:11434",          # representative Tailscale host
+        "https://100.100.100.100:443",        # Tailscale MagicDNS anchor
+        "https://100.127.255.254:443",        # upper bound -1
+        "http://100.127.255.255:11434",       # upper bound of CGNAT block
+    ])
+    def test_tailscale_cgnat_is_local(self, url):
+        """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
+        assert is_local_endpoint(url) is True
+
+    @pytest.mark.parametrize("url", [
+        "http://100.63.255.255:11434",        # just below CGNAT block
+        "http://100.128.0.1:11434",           # just above CGNAT block
+        "http://100.200.0.1:11434",           # well outside CGNAT
+        "http://99.64.0.1:11434",             # first octet wrong
+    ])
+    def test_near_but_not_cgnat_is_remote(self, url):
+        """Hosts adjacent to but outside 100.64.0.0/10 must not match."""
+        assert is_local_endpoint(url) is False
@@ -79,6 +79,28 @@ class TestMemoryManagerUserIdThreading:
        assert p._init_kwargs.get("platform") == "telegram"
        assert p._init_session_id == "sess-123"

+    def test_chat_context_forwarded_to_provider(self):
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-chat",
+            platform="discord",
+            user_id="discord_u_7",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+        )
+
+        assert p._init_kwargs.get("user_name") == "fakeusername"
+        assert p._init_kwargs.get("chat_id") == "1485316232612941897"
+        assert p._init_kwargs.get("chat_name") == "fakeassistantname-forums"
+        assert p._init_kwargs.get("chat_type") == "thread"
+        assert p._init_kwargs.get("thread_id") == "1491249007475949698"
+
    def test_no_user_id_when_cli(self):
        """CLI sessions should not have user_id in kwargs."""
        mgr = MemoryManager()
@@ -334,3 +356,4 @@ class TestAIAgentUserIdPropagation:
            agent = object.__new__(AIAgent)
            agent._user_id = None
            assert agent._user_id is None
+
@@ -385,6 +385,7 @@ class TestStripProviderPrefix:
        assert _strip_provider_prefix("local:my-model") == "my-model"
        assert _strip_provider_prefix("openrouter:anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4"
        assert _strip_provider_prefix("anthropic:claude-sonnet-4") == "claude-sonnet-4"
+        assert _strip_provider_prefix("stepfun:step-3.5-flash") == "step-3.5-flash"

    def test_ollama_model_tag_preserved(self):
        """Ollama model:tag format must NOT be stripped."""
--- a/Show More
+++ b/Show More