feat(tui): add model picker and approval/clarify prompt workflows

Three new static prompt components for showroom capture (no useInput): - ApprovalPromptStatic: double-bordered warning box with command preview, 4 options (allow once/session/always/deny), ▸ selection, footer hints - ClarifyPromptStatic: heading + numbered choices with 'Other' option - ModelPickerStatic: double-bordered popup with provider/model lists, current model header, persist toggle, quick-pick numbers New workflows: - interactive-prompts: approval → clarify → deploy result - model-picker: provider stage → model stage → switch result
feat(tui): add interactive prompts workflow to showroom
2026-04-26 01:17:21 -05:00 · 2026-04-26 01:15:42 -05:00 · 2026-04-26 01:11:27 -05:00 · 2026-04-26 01:02:32 -05:00 · 2026-04-26 00:44:36 -05:00 · 2026-04-25 23:39:06 -05:00
127 changed files with 8151 additions and 2745 deletions
@@ -1680,9 +1680,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, flush_memories, etc.) may set these for
-    # older models; drop them here as a safety net so upstream 4.6 → 4.7
-    # migrations don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, etc.) may set these for older models;
+    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
+    # don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -390,7 +390,7 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Tools support for flush_memories and similar callers
+        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
            converted = []
@@ -1349,6 +1349,49 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


+def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
+    """Detect provider 400s for an unsupported request parameter.
+
+    Different OpenAI-compatible endpoints phrase the same class of error a few
+    ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
+    ``param`` field, ``X is not supported``, ``unknown parameter: X``,
+    ``unrecognized request argument: X``.  We match on both the parameter
+    name and a generic "unsupported/unknown/unrecognized parameter" marker so
+    call sites can reactively retry without the offending key instead of
+    surfacing a noisy auxiliary failure.
+
+    Generalizes the temperature-specific detector that originally shipped
+    with PR #15621 so the same retry strategy can cover ``max_tokens``,
+    ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
+    for the generalization pattern.
+    """
+    param_lower = (param or "").lower()
+    if not param_lower:
+        return False
+    err_lower = str(exc).lower()
+    if param_lower not in err_lower:
+        return False
+    return any(marker in err_lower for marker in (
+        "unsupported parameter",
+        "unsupported_parameter",
+        "not supported",
+        "does not support",
+        "unknown parameter",
+        "unrecognized request argument",
+        "unrecognized parameter",
+        "invalid parameter",
+    ))
+
+
+def _is_unsupported_temperature_error(exc: Exception) -> bool:
+    """Back-compat wrapper: detect API errors where the model rejects ``temperature``.
+
+    Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
+    public symbol because existing tests and call sites import it by name.
+    """
+    return _is_unsupported_parameter_error(exc, "temperature")
+
+
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@@ -2760,8 +2803,8 @@ def _build_call_kwargs(
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
-    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
-    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0 on
+    # structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
        from agent.anthropic_adapter import _forbids_sampling_params
@@ -2849,7 +2892,7 @@ def call_llm(

    Args:
        task: Auxiliary task name ("compression", "vision", "web_extract",
-              "session_search", "skills_hub", "mcp", "flush_memories").
+              "session_search", "skills_hub", "mcp", "title_generation").
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
@@ -2952,13 +2995,45 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
+    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
+    # then payment fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s: provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                # If retry still fails, fall through to the max_tokens /
+                # payment / auth chains below using the temperature-stripped
+                # kwargs.  Re-raise only if the retry hit something those
+                # chains won't handle.
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3221,8 +3296,35 @@ async def async_call_llm(
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s (async): provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -44,22 +44,31 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
 # Multimodal content helpers
 # ---------------------------------------------------------------------------

-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
    """Convert chat-style multimodal content to Responses API input parts.

    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
+
+    The ``role`` parameter controls the text content type:
+    - ``"user"`` (default) → ``"input_text"``
+    - ``"assistant"`` → ``"output_text"``
+
+    The Responses API rejects ``input_text`` inside assistant messages and
+    ``output_text`` inside user messages, so callers MUST pass the correct
+    role for the message being converted.

    Returns an empty list when ``content`` is not a list or contains no
    recognized parts — callers fall back to the string path.
    """
+    text_type = "output_text" if role == "assistant" else "input_text"
    if not isinstance(content, list):
        return []
    converted: List[Dict[str, Any]] = []
    for part in content:
        if isinstance(part, str):
            if part:
-                converted.append({"type": "input_text", "text": part})
+                converted.append({"type": text_type, "text": part})
            continue
        if not isinstance(part, dict):
            continue
@@ -67,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
        if ptype in {"text", "input_text", "output_text"}:
            text = part.get("text")
            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
+                converted.append({"type": text_type, "text": text})
            continue
        if ptype in {"image_url", "input_image"}:
            image_ref = part.get("image_url")
@@ -233,9 +242,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
        if role in {"user", "assistant"}:
            content = msg.get("content", "")
            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
+                content_parts = _chat_content_to_responses_parts(content, role=role)
+                text_type = "output_text" if role == "assistant" else "input_text"
                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                    p.get("text", "") for p in content_parts if p.get("type") == text_type
                )
            else:
                content_parts = []
@@ -429,13 +439,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                content = ""
            if isinstance(content, list):
                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
+                # is already in Responses format (``input_text`` / ``output_text``
+                # / ``input_image``).  Validate each part and pass through.
+                # Use the correct text type for the role — ``output_text`` for
+                # assistant messages, ``input_text`` for user messages.
+                text_type = "output_text" if role == "assistant" else "input_text"
                validated: List[Dict[str, Any]] = []
                for part_idx, part in enumerate(content):
                    if isinstance(part, str):
                        if part:
-                            validated.append({"type": "input_text", "text": part})
+                            validated.append({"type": text_type, "text": part})
                        continue
                    if not isinstance(part, dict):
                        raise ValueError(
@@ -446,7 +459,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                        text = part.get("text", "")
                        if not isinstance(text, str):
                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
+                        validated.append({"type": text_type, "text": text})
                    elif ptype in {"input_image", "image_url"}:
                        image_ref = part.get("image_url", "")
                        detail = part.get("detail")
@@ -318,6 +318,13 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Recalculate token budgets for the new context length so the
+        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )

    def __init__(
        self,
@@ -796,6 +796,10 @@ delegation:
                                              # Raise to 2 to allow workers to spawn their own subagents.
                                              # Requires role="orchestrator" on intermediate agents.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
+                                              # or auto-approve "once" (true) instead of blocking on stdin.
+                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
+                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
@@ -3176,7 +3176,14 @@ class HermesCLI:
        # the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
        runtime_model = runtime.get("model")
        if runtime_model and isinstance(runtime_model, str):
-            self.model = runtime_model
+            # Only use runtime model if: model is unset, or model equals provider name
+            should_use_runtime_model = (
+                not self.model or  # No model configured yet
+                self.model == self.provider or  # Model is the provider slug
+                self.model == runtime.get("name")  # Model matches provider display name
+            )
+            if should_use_runtime_model:
+                self.model = runtime_model

        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
        # without `hermes model`), fall back to the provider's first catalog
@@ -4311,7 +4318,7 @@ class HermesCLI:

        _cprint(f"\n  {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
        _cprint(f"  {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
-        _cprint(f"  {_DIM}Draft editor: Ctrl+G{_RST}")
+        _cprint(f"  {_DIM}Draft editor: Ctrl+G (Alt+G in VSCode/Cursor){_RST}")
        if _is_termux_environment():
            _cprint(f"  {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n")
        else:
@@ -4661,10 +4668,6 @@ class HermesCLI:
    def new_session(self, silent=False):
        """Start a fresh session with a new session ID and cleared agent state."""
        if self.agent and self.conversation_history:
-            try:
-                self.agent.flush_memories(self.conversation_history)
-            except (Exception, KeyboardInterrupt):
-                pass
            # Trigger memory extraction on the old session before session_id rotates.
            self.agent.commit_memory_session(self.conversation_history)
            self._notify_session_boundary("on_session_finalize")
@@ -9305,14 +9308,18 @@ class HermesCLI:
            """Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
            event.current_buffer.insert_text('\n')

-        @kb.add(
-            'c-g',
-            filter=Condition(
-                lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
-            ),
+        # VSCode/Cursor bind Ctrl+G to "Find Next" at the editor level, so
+        # the keystroke never reaches the embedded terminal. Alt+G is unbound
+        # in those IDEs and arrives here as ('escape', 'g') — register it as
+        # a fallback so the editor handoff works inside Cursor/VSCode too.
+        _editor_filter = Condition(
+            lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
        )
+
+        @kb.add('c-g', filter=_editor_filter)
+        @kb.add('escape', 'g', filter=_editor_filter)
        def handle_open_in_editor(event):
-            """Ctrl+G opens the current draft in an external editor."""
+            """Ctrl+G (or Alt+G in VSCode/Cursor) opens the current draft in an external editor."""
            cli_ref._open_external_editor(event.current_buffer)

        @kb.add('tab', eager=True)
@@ -9776,6 +9783,11 @@ class HermesCLI:
                completer=_completer,
            ),
        )
+        # Keep prompt_toolkit on its simple tempfile path. Setting
+        # buffer.tempfile = "prompt.md" triggers its complex-tempfile branch,
+        # which tries to mkdir() the mkdtemp() directory again and raises
+        # EEXIST. The suffix keeps markdown highlighting without that bug.
+        input_area.buffer.tempfile_suffix = '.md'

        # Dynamic height: accounts for both explicit newlines AND visual
        # wrapping of long lines so the input area always fits its content.
@@ -10781,12 +10793,6 @@ class HermesCLI:
                    self.agent.interrupt()
                except Exception:
                    pass
-            # Flush memories before exit (only for substantial conversations)
-            if self.agent and self.conversation_history:
-                try:
-                    self.agent.flush_memories(self.conversation_history)
-                except (Exception, KeyboardInterrupt):
-                    pass
            # Shut down voice recorder (release persistent audio stream)
            if hasattr(self, '_voice_recorder') and self._voice_recorder:
                try:
@@ -16,7 +16,7 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any
+from typing import Optional, Dict, List, Any, Union

 logger = logging.getLogger(__name__)

@@ -417,6 +417,7 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
@@ -438,6 +439,9 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        context_from: Optional job ID (or list of job IDs) whose most recent output
+                      is injected into the prompt as context before each run.
+                      Useful for chaining cron jobs: job A finds data, job B processes it.
        enabled_toolsets: Optional list of toolset names to restrict the agent to.
                          When set, only tools from these toolsets are loaded, reducing
                          token overhead. When omitted, all default tools are loaded.
@@ -481,6 +485,14 @@ def create_job(
    normalized_toolsets = normalized_toolsets or None
    normalized_workdir = _normalize_workdir(workdir)

+    # Normalize context_from: accept str or list of str, store as list or None
+    if isinstance(context_from, str):
+        context_from = [context_from.strip()] if context_from.strip() else None
+    elif isinstance(context_from, list):
+        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
+    else:
+        context_from = None
+
    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
        "id": job_id,
@@ -492,6 +504,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -671,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

+    # Inject output from referenced cron jobs as context.
+    context_from = job.get("context_from")
+    if context_from:
+        from cron.jobs import OUTPUT_DIR
+        if isinstance(context_from, str):
+            context_from = [context_from]
+        for source_job_id in context_from:
+            # Guard against path traversal — valid job IDs are 12-char hex strings
+            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
+                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
+                continue
+            try:
+                job_output_dir = OUTPUT_DIR / source_job_id
+                if not job_output_dir.exists():
+                    continue  # silent skip — no output yet
+                output_files = sorted(
+                    job_output_dir.glob("*.md"),
+                    key=lambda f: f.stat().st_mtime,
+                    reverse=True,
+                )
+                if not output_files:
+                    continue  # silent skip — no output yet
+                latest_output = output_files[0].read_text(encoding="utf-8").strip()
+                # Truncate to 8K characters to avoid prompt bloat
+                _MAX_CONTEXT_CHARS = 8000
+                if len(latest_output) > _MAX_CONTEXT_CHARS:
+                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
+                if latest_output:
+                    prompt = (
+                        f"## Output from job '{source_job_id}'\n"
+                        "The following is the most recent output from a preceding "
+                        "cron job. Use it as context for your analysis.\n\n"
+                        f"```\n{latest_output}\n```\n\n"
+                        f"{prompt}"
+                    )
+                else:
+                    continue  # silent skip — empty output
+            except (OSError, PermissionError) as e:
+                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
+                # silent skip — do not pollute the prompt with error messages
+
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
@@ -2543,6 +2543,9 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
+        guild_id: Optional[str] = None,
+        parent_chat_id: Optional[str] = None,
+        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2560,6 +2563,9 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
+            guild_id=str(guild_id) if guild_id else None,
+            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
+            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -3261,6 +3261,7 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
+                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3320,6 +3321,9 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
+            guild_id=str(message.guild.id) if message.guild else None,
+            parent_chat_id=parent_channel_id,
+            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -524,7 +524,7 @@ def _load_gateway_config() -> dict:
 def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from config.yaml — single source of truth.

-    Without this, temporary AIAgent instances (memory flush, /compress) fall
+    Without this, temporary AIAgent instances (e.g. /compress) fall
    back to the hardcoded default which fails when the active provider is
    openai-codex.
    """
@@ -915,129 +915,6 @@ class GatewayRunner:
                e,
            )

-    # -----------------------------------------------------------------
-
-    def _flush_memories_for_session(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Prompt the agent to save memories/skills before context is lost.
-
-        Synchronous worker — meant to be called via run_in_executor from
-        an async context so it doesn't block the event loop.
-        """
-        # Skip cron sessions — they run headless with no meaningful user
-        # conversation to extract memories from.
-        if old_session_id and old_session_id.startswith("cron_"):
-            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
-            return
-
-        try:
-            history = self.session_store.load_transcript(old_session_id)
-            if not history or len(history) < 4:
-                return
-
-            from run_agent import AIAgent
-            model, runtime_kwargs = self._resolve_session_agent_runtime(
-                session_key=session_key,
-            )
-            if not runtime_kwargs.get("api_key"):
-                return
-
-            tmp_agent = AIAgent(
-                **runtime_kwargs,
-                model=model,
-                max_iterations=8,
-                quiet_mode=True,
-                skip_memory=True,  # Flush agent — no memory provider
-                enabled_toolsets=["memory", "skills"],
-                session_id=old_session_id,
-            )
-            try:
-                # Fully silence the flush agent — quiet_mode only suppresses init
-                # messages; tool call output still leaks to the terminal through
-                # _safe_print → _print_fn.  Set a no-op to prevent that.
-                tmp_agent._print_fn = lambda *a, **kw: None
-
-                # Build conversation history from transcript
-                msgs = [
-                    {"role": m.get("role"), "content": m.get("content")}
-                    for m in history
-                    if m.get("role") in ("user", "assistant") and m.get("content")
-                ]
-
-                # Read live memory state from disk so the flush agent can see
-                # what's already saved and avoid overwriting newer entries.
-                _current_memory = ""
-                try:
-                    from tools.memory_tool import get_memory_dir
-                    _mem_dir = get_memory_dir()
-                    for fname, label in [
-                        ("MEMORY.md", "MEMORY (your personal notes)"),
-                        ("USER.md", "USER PROFILE (who the user is)"),
-                    ]:
-                        fpath = _mem_dir / fname
-                        if fpath.exists():
-                            content = fpath.read_text(encoding="utf-8").strip()
-                            if content:
-                                _current_memory += f"\n\n## Current {label}:\n{content}"
-                except Exception:
-                    pass  # Non-fatal — flush still works, just without the guard
-
-                # Give the agent a real turn to think about what to save
-                flush_prompt = (
-                    "[System: This session is about to be automatically reset due to "
-                    "inactivity or a scheduled daily reset. The conversation context "
-                    "will be cleared after this turn.\n\n"
-                    "Review the conversation above and:\n"
-                    "1. Save any important facts, preferences, or decisions to memory "
-                    "(user profile or your notes) that would be useful in future sessions.\n"
-                    "2. If you discovered a reusable workflow or solved a non-trivial "
-                    "problem, consider saving it as a skill.\n"
-                    "3. If nothing is worth saving, that's fine — just skip.\n\n"
-                )
-
-                if _current_memory:
-                    flush_prompt += (
-                        "IMPORTANT — here is the current live state of memory. Other "
-                        "sessions, cron jobs, or the user may have updated it since this "
-                        "conversation ended. Do NOT overwrite or remove entries unless "
-                        "the conversation above reveals something that genuinely "
-                        "supersedes them. Only add new information that is not already "
-                        "captured below."
-                        f"{_current_memory}\n\n"
-                    )
-
-                flush_prompt += (
-                    "Do NOT respond to the user. Just use the memory and skill_manage "
-                    "tools if needed, then stop.]"
-                )
-
-                tmp_agent.run_conversation(
-                    user_message=flush_prompt,
-                    conversation_history=msgs,
-                )
-            finally:
-                self._cleanup_agent_resources(tmp_agent)
-            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-        except Exception as e:
-            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
-
-    async def _async_flush_memories(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Run the sync memory flush in a thread pool so it won't block the event loop."""
-        loop = asyncio.get_running_loop()
-        await loop.run_in_executor(
-            None,
-            self._flush_memories_for_session,
-            old_session_id,
-            session_key,
-        )
-
    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly
@@ -1103,7 +980,7 @@ class GatewayRunner:
            if override_runtime.get("api_key"):
                logger.debug(
                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
-                    (resolved_session_key or "")[:30], model, override_model,
+                    resolved_session_key or "", model, override_model,
                    override_runtime.get("provider"),
                )
                return override_model, override_runtime
@@ -1111,12 +988,12 @@ class GatewayRunner:
            # resolution and apply model/provider from the override on top.
            logger.debug(
                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
-                (resolved_session_key or "")[:30], model, override_model,
+                resolved_session_key or "", model, override_model,
            )
        else:
            logger.debug(
                "No session model override: session=%s config_model=%s override_keys=%s",
-                (resolved_session_key or "")[:30], model,
+                resolved_session_key or "", model,
                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
            )

@@ -1687,7 +1564,7 @@ class GatewayRunner:
                continue
            try:
                agent.interrupt(reason)
-                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
+                logger.debug("Interrupted running agent for session %s during shutdown", session_key)
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

@@ -1859,7 +1736,7 @@ class GatewayRunner:
                    logger.warning(
                        "Auto-suspended stuck session %s (active across %d "
                        "consecutive restarts — likely a stuck loop)",
-                        session_key[:30], counts[session_key],
+                        session_key, counts[session_key],
                    )
            except Exception:
                pass
@@ -2272,7 +2149,7 @@ class GatewayRunner:
        except Exception as e:
            logger.error("Recovered watcher setup error: %s", e)

-        # Start background session expiry watcher for proactive memory flushing
+        # Start background session expiry watcher to finalize expired sessions
        asyncio.create_task(self._session_expiry_watcher())

        # Start background reconnection watcher for platforms that failed at startup
@@ -2289,25 +2166,24 @@ class GatewayRunner:
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
-        """Background task that proactively flushes memories for expired sessions.
-        
-        Runs every `interval` seconds (default 5 min).  For each session that
-        has expired according to its reset policy, flushes memories in a thread
-        pool and marks the session so it won't be flushed again.
+        """Background task that finalizes expired sessions.

-        This means memories are already saved by the time the user sends their
-        next message, so there's no blocking delay.
+        Runs every ``interval`` seconds (default 5 min).  For each session
+        whose reset policy has expired, invokes ``on_session_finalize``
+        hooks, cleans up the cached AIAgent's tool resources, evicts the
+        cache entry so it can be garbage-collected, and marks the session
+        so it won't be finalized again.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
-        _flush_failures: dict[str, int] = {}  # session_id -> consecutive failure count
-        _MAX_FLUSH_RETRIES = 3
+        _finalize_failures: dict[str, int] = {}  # session_id -> consecutive failure count
+        _MAX_FINALIZE_RETRIES = 3
        while self._running:
            try:
                self.session_store._ensure_loaded()
                # Collect expired sessions first, then log a single summary.
                _expired_entries = []
                for key, entry in list(self.session_store._entries.items()):
-                    if entry.memory_flushed:
+                    if entry.expiry_finalized:
                        continue
                    if not self.session_store._is_session_expired(entry):
                        continue
@@ -2325,13 +2201,12 @@ class GatewayRunner:
                        f"{p}:{c}" for p, c in sorted(_platforms.items())
                    )
                    logger.info(
-                        "Session expiry: %d sessions to flush (%s)",
+                        "Session expiry: %d sessions to finalize (%s)",
                        len(_expired_entries), _plat_summary,
                    )

                for key, entry in _expired_entries:
                    try:
-                        await self._async_flush_memories(entry.session_id, key)
                        try:
                            from hermes_cli.plugins import invoke_hook as _invoke_hook
                            _parts = key.split(":")
@@ -2363,48 +2238,48 @@ class GatewayRunner:
                        # be garbage-collected.  Otherwise the cache grows
                        # unbounded across the gateway's lifetime.
                        self._evict_cached_agent(key)
-                        # Mark as flushed and persist to disk so the flag
+                        # Mark as finalized and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
-                            entry.memory_flushed = True
+                            entry.expiry_finalized = True
                            self.session_store._save()
                        logger.debug(
-                            "Memory flush completed for session %s",
+                            "Session expiry finalized for %s",
                            entry.session_id,
                        )
-                        _flush_failures.pop(entry.session_id, None)
+                        _finalize_failures.pop(entry.session_id, None)
                    except Exception as e:
-                        failures = _flush_failures.get(entry.session_id, 0) + 1
-                        _flush_failures[entry.session_id] = failures
-                        if failures >= _MAX_FLUSH_RETRIES:
+                        failures = _finalize_failures.get(entry.session_id, 0) + 1
+                        _finalize_failures[entry.session_id] = failures
+                        if failures >= _MAX_FINALIZE_RETRIES:
                            logger.warning(
-                                "Memory flush gave up after %d attempts for %s: %s. "
-                                "Marking as flushed to prevent infinite retry loop.",
+                                "Session finalize gave up after %d attempts for %s: %s. "
+                                "Marking as finalized to prevent infinite retry loop.",
                                failures, entry.session_id, e,
                            )
                            with self.session_store._lock:
-                                entry.memory_flushed = True
+                                entry.expiry_finalized = True
                                self.session_store._save()
-                            _flush_failures.pop(entry.session_id, None)
+                            _finalize_failures.pop(entry.session_id, None)
                        else:
                            logger.debug(
-                                "Memory flush failed (%d/%d) for %s: %s",
-                                failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
+                                "Session finalize failed (%d/%d) for %s: %s",
+                                failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
                            )

                if _expired_entries:
-                    _flushed = sum(
-                        1 for _, e in _expired_entries if e.memory_flushed
+                    _done = sum(
+                        1 for _, e in _expired_entries if e.expiry_finalized
                    )
-                    _failed = len(_expired_entries) - _flushed
+                    _failed = len(_expired_entries) - _done
                    if _failed:
                        logger.info(
-                            "Session expiry done: %d flushed, %d pending retry",
-                            _flushed, _failed,
+                            "Session expiry done: %d finalized, %d pending retry",
+                            _done, _failed,
                        )
                    else:
                        logger.info(
-                            "Session expiry done: %d flushed", _flushed,
+                            "Session expiry done: %d finalized", _done,
                        )

                # Sweep agents that have been idle beyond the TTL regardless
@@ -2681,7 +2556,7 @@ class GatewayRunner:
                    except Exception as _e:
                        logger.debug(
                            "mark_resume_pending failed for %s: %s",
-                            _sk[:20], _e,
+                            _sk, _e,
                        )
                self._interrupt_running_agents(
                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
@@ -3347,7 +3222,7 @@ class GatewayRunner:
                logger.warning(
                    "Evicting stale _running_agents entry for %s "
                    "(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
-                    _quick_key[:30], _stale_age, _stale_idle,
+                    _quick_key, _stale_age, _stale_idle,
                    _raw_stale_timeout, _stale_detail,
                )
                self._invalidate_session_run_generation(
@@ -3383,7 +3258,7 @@ class GatewayRunner:
                    interrupt_reason=_INTERRUPT_REASON_STOP,
                    invalidation_reason="stop_command",
                )
-                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
+                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
                return "⚡ Stopped. You can continue this session."

            # /reset and /new must bypass the running-agent guard so they
@@ -3449,7 +3324,7 @@ class GatewayRunner:
                    try:
                        accepted = running_agent.steer(steer_text)
                    except Exception as exc:
-                        logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+                        logger.warning("Steer failed for session %s: %s", _quick_key, exc)
                        return f"⚠️ Steer failed: {exc}"
                    if accepted:
                        preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
@@ -3532,7 +3407,7 @@ class GatewayRunner:
                )

            if event.message_type == MessageType.PHOTO:
-                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
+                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
@@ -3552,7 +3427,7 @@ class GatewayRunner:
                logger.debug(
                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
                    time.time() - _started_at,
-                    _quick_key[:20],
+                    _quick_key,
                )
                adapter = self.adapters.get(source.platform)
                if adapter:
@@ -3570,7 +3445,7 @@ class GatewayRunner:
                if event.get_command() == "stop":
                    # Force-clean the sentinel so the session is unlocked.
                    self._release_running_agent_state(_quick_key)
-                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
+                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
                # Queue the message so it will be picked up after the
                # agent starts.
@@ -3592,10 +3467,10 @@ class GatewayRunner:
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
            if self._busy_input_mode == "queue":
-                logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
+                logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
                self._queue_or_replace_pending_event(_quick_key, event)
                return None
-            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
+            logger.debug("PRIORITY interrupt for session %s", _quick_key)
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
@@ -4593,7 +4468,7 @@ class GatewayRunner:
            if not self._is_session_run_current(_quick_key, run_generation):
                logger.info(
                    "Discarding stale agent result for %s — generation %d is no longer current",
-                    _quick_key[:20] if _quick_key else "?",
+                    _quick_key or "?",
                    run_generation,
                )
                _stale_adapter = self.adapters.get(source.platform)
@@ -4644,7 +4519,7 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug(
                        "clear_resume_pending failed for %s: %s",
-                        session_key[:20], _e,
+                        session_key, _e,
                    )

            # Surface error details when the agent failed silently (final_response=None)
@@ -5021,19 +4896,11 @@ class GatewayRunner:
        # Get existing session key
        session_key = self._session_key_for_source(source)
        self._invalidate_session_run_generation(session_key, reason="session_reset")
-        
-        # Flush memories in the background (fire-and-forget) so the user
-        # gets the "Session reset!" response immediately.
-        try:
-            old_entry = self.session_store._entries.get(session_key)
-            if old_entry:
-                _flush_task = asyncio.create_task(
-                    self._async_flush_memories(old_entry.session_id, session_key)
-                )
-                self._background_tasks.add(_flush_task)
-                _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Gateway memory flush on reset failed: %s", e)
+
+        # Snapshot the old entry so on_session_finalize can report the
+        # expiring session id before reset_session() rotates it.
+        old_entry = self.session_store._entries.get(session_key)
+
        # Close tool resources on the old agent (terminal sandboxes, browser
        # daemons, background processes) before evicting from cache.
        # Guard with getattr because test fixtures may skip __init__.
@@ -5291,7 +5158,7 @@ class GatewayRunner:
                interrupt_reason=_INTERRUPT_REASON_STOP,
                invalidation_reason="stop_command_pending",
            )
-            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
+            logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
        if agent:
            # Force-clean the session lock so a truly hung agent doesn't
@@ -7252,16 +7119,6 @@ class GatewayRunner:
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

-        # Flush memories for current session before switching
-        try:
-            _flush_task = asyncio.create_task(
-                self._async_flush_memories(current_entry.session_id, session_key)
-            )
-            self._background_tasks.add(_flush_task)
-            _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Memory flush on resume failed: %s", e)
-
        # Clear any running agent for this session key
        self._release_running_agent_state(session_key)

@@ -8798,7 +8655,7 @@ class GatewayRunner:
        if reason:
            logger.info(
                "Invalidated run generation for %s → %d (%s)",
-                session_key[:20],
+                session_key,
                generation,
                reason,
            )
@@ -9205,7 +9062,7 @@ class GatewayRunner:
                        if not _run_still_current():
                            logger.info(
                                "Discarding stale proxy stream for %s — generation %d is no longer current",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                                run_generation or 0,
                            )
                            return {
@@ -9269,7 +9126,7 @@ class GatewayRunner:
        if not _run_still_current():
            logger.info(
                "Discarding stale proxy result for %s — generation %d is no longer current",
-                session_key[:20] if session_key else "?",
+                session_key or "?",
                run_generation or 0,
            )
            return {
@@ -9711,7 +9568,7 @@ class GatewayRunner:
                )
                logger.debug(
                    "run_agent resolved: model=%s provider=%s session=%s",
-                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                    model, runtime_kwargs.get("provider"), session_key or "",
                )
            except Exception as exc:
                return {
@@ -10322,7 +10179,7 @@ class GatewayRunner:
            ):
                logger.info(
                    "Skipping stale agent promotion for %s — generation %s is no longer current",
-                    (session_key or "")[:20],
+                    session_key or "",
                    run_generation,
                )
                return
@@ -10469,7 +10326,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10529,7 +10386,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10631,7 +10488,7 @@ class GatewayRunner:
                    if _is_control_interrupt_message(interrupt_message):
                        logger.info(
                            "Ignoring control interrupt message for session %s: %s",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                            interrupt_message,
                        )
                    else:
@@ -10675,7 +10532,7 @@ class GatewayRunner:
            if self._draining and (pending_event or pending):
                logger.info(
                    "Discarding pending follow-up for session %s during gateway %s",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    self._status_action_label(),
                )
                pending_event = None
@@ -10732,7 +10589,7 @@ class GatewayRunner:
                        try:
                            logger.info(
                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                            )
                            await adapter.send(
                                source.chat_id,
@@ -10744,7 +10601,7 @@ class GatewayRunner:
                    elif first_response:
                        logger.info(
                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                        )
                    # Release deferred bg-review notifications now that the
                    # first response has been delivered.  Pop from the
@@ -10879,7 +10736,7 @@ class GatewayRunner:
            if not _is_empty_sentinel and (_streamed or _previewed):
                logger.info(
                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    _streamed,
                    _previewed,
                )
@@ -87,6 +87,9 @@ class SessionSource:
    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
+    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
+    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
+    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -124,8 +127,14 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
+        if self.guild_id:
+            d["guild_id"] = self.guild_id
+        if self.parent_chat_id:
+            d["parent_chat_id"] = self.parent_chat_id
+        if self.message_id:
+            d["message_id"] = self.message_id
        return d
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -139,6 +148,9 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
+            guild_id=data.get("guild_id"),
+            parent_chat_id=data.get("parent_chat_id"),
+            message_id=data.get("message_id"),
        )
    

@@ -190,6 +202,31 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


+def _discord_tools_loaded() -> bool:
+    """True iff the agent will actually have Discord tools this session.
+
+    Two conditions must hold:
+      1. The `discord` or `discord_admin` toolset is enabled for the
+         Discord platform via `hermes tools` (opt-in, default OFF).
+      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
+         at registry time, so the toolset being enabled in config is not
+         enough if the token isn't configured.
+
+    Returns False (safe default — keeps the stale-API disclaimer) on any
+    error so a bad config can't silently promise tools the agent lacks.
+    """
+    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
+        return False
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.tools_config import _get_platform_tools
+        cfg = load_config()
+        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
+        return "discord" in enabled or "discord_admin" in enabled
+    except Exception:
+        return False
+
+
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -277,14 +314,33 @@ def build_session_context_prompt(
            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
-        lines.append("")
-        lines.append(
-            "**Platform notes:** You are running inside Discord. "
-            "You do NOT have access to Discord-specific APIs — you cannot search "
-            "channel history, pin messages, manage roles, or list server members. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
-        )
+        # Inject the Discord IDs block only when the agent actually has
+        # Discord tools loaded this session — i.e. the user opted into
+        # `discord` / `discord_admin` via `hermes tools` AND the bot
+        # token is configured.  Otherwise keep the stale-API disclaimer
+        # honest so we never promise tools the agent lacks.
+        if _discord_tools_loaded():
+            src = context.source
+            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
+            if src.guild_id:
+                id_lines.append(f"  - Guild: `{src.guild_id}`")
+            if src.thread_id and src.parent_chat_id:
+                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
+                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
+            else:
+                id_lines.append(f"  - Channel: `{src.chat_id}`")
+            if src.message_id:
+                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+            lines.extend(id_lines)
+        else:
+            lines.append("")
+            lines.append(
+                "**Platform notes:** You are running inside Discord. "
+                "You do NOT have access to Discord-specific APIs — you cannot search "
+                "channel history, pin messages, manage roles, or list server members. "
+                "Do not promise to perform these actions. If the user asks, explain "
+                "that you can only read messages sent directly to you and respond."
+            )
    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
@@ -383,11 +439,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it successfully flushes
-    # memories for this session.  Persisted to sessions.json so the flag
-    # survives gateway restarts (the old in-memory _pre_flushed_sessions
-    # set was lost on restart, causing redundant re-flushes).
-    memory_flushed: bool = False
+    # Set by the background expiry watcher after it finalizes an expired
+    # session (invoking on_session_finalize hooks and evicting the cached
+    # agent).  Persisted to sessions.json so the flag survives gateway
+    # restarts — prevents redundant finalization runs.
+    expiry_finalized: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -423,7 +479,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "memory_flushed": self.memory_flushed,
+            "expiry_finalized": self.expiry_finalized,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -475,7 +531,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            memory_flushed=data.get("memory_flushed", False),
+            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -103,7 +103,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
+    CommandDef("model", "Switch model for this session", "Configuration",
+               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
               cli_only=True),

@@ -612,14 +612,6 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
-        "flush_memories": {
-            "provider": "auto",
-            "model": "",
-            "base_url": "",
-            "api_key": "",
-            "timeout": 30,
-            "extra_body": {},
-        },
        "title_generation": {
            "provider": "auto",
            "model": "",
@@ -783,6 +775,15 @@ DEFAULT_CONFIG = {
        # warning log if out of range.
        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
+        # When a subagent hits a dangerous-command approval prompt, the parent's
+        # prompt_toolkit TUI owns stdin — a thread-local input() call from the
+        # subagent worker would deadlock the parent UI. To avoid the deadlock,
+        # subagent threads ALWAYS resolve approvals non-interactively:
+        #   false (default) → auto-deny with a logger.warning audit line (safe)
+        #   true             → auto-approve "once" with a logger.warning audit line
+        # Flip to true only if you trust delegated work to run dangerous cmds
+        # without human review (cron pipelines, batch automation, etc.).
+        "subagent_auto_approve": False,
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -839,7 +840,7 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
-        # discord_server tool: restrict which actions the agent may call.
+        # discord / discord_admin tools: restrict which actions the agent may call.
        # Default (empty) = all actions allowed (subject to bot privileged intents).
        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
        # or YAML list. Unknown names are dropped with a warning at load time.
@@ -839,6 +839,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:


 def _tui_build_needed(tui_dir: Path) -> bool:
+    if _hermes_ink_bundle_stale(tui_dir):
+        return True
    entry = tui_dir / "dist" / "entry.js"
    if not entry.exists():
        return True
@@ -1026,7 +1028,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
    return [node, str(root / "dist" / "entry.js")], root


-def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
+def _launch_tui(
+    resume_session_id: Optional[str] = None,
+    tui_dev: bool = False,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+):
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"

@@ -1036,6 +1043,12 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
+    if model:
+        env["HERMES_MODEL"] = model
+        env["HERMES_INFERENCE_MODEL"] = model
+    if provider:
+        env["HERMES_TUI_PROVIDER"] = provider
+        env["HERMES_INFERENCE_PROVIDER"] = provider
    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
    # large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1174,6 +1187,8 @@ def cmd_chat(args):
        _launch_tui(
            getattr(args, "resume", None),
            tui_dev=getattr(args, "tui_dev", False),
+            model=getattr(args, "model", None),
+            provider=getattr(args, "provider", None),
        )

    # Import and run the CLI
@@ -1707,7 +1722,6 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
    ("session_search",   "Session search",   "past-conversation recall"),
    ("approval",         "Approval",         "smart command approval"),
    ("mcp",              "MCP",              "MCP tool reasoning"),
-    ("flush_memories",   "Flush memories",   "memory consolidation"),
    ("title_generation", "Title generation", "session titles"),
    ("skills_hub",       "Skills hub",       "skills search/install"),
 ]
@@ -6046,6 +6060,75 @@ def _cmd_update_impl(args, gateway_mode: bool):
            )
            import signal as _signal

+            def _wait_for_service_active(
+                scope_cmd_: list, svc_name_: str, timeout: float = 10.0,
+            ) -> bool:
+                """Poll ``systemctl is-active`` until the unit reports active.
+
+                systemd's Stopped -> Started transition after a graceful exit
+                (or a hard restart) is not instantaneous; a one-shot check
+                races that window and falsely reports the unit as down.
+                Poll every 0.5s up to ``timeout`` seconds before giving up.
+                """
+                deadline = _time.monotonic() + max(timeout, 0.5)
+                while True:
+                    try:
+                        _verify = subprocess.run(
+                            scope_cmd_ + ["is-active", svc_name_],
+                            capture_output=True, text=True, timeout=5,
+                        )
+                        if _verify.stdout.strip() == "active":
+                            return True
+                    except (FileNotFoundError, subprocess.TimeoutExpired):
+                        pass
+                    if _time.monotonic() >= deadline:
+                        return False
+                    _time.sleep(0.5)
+
+            def _service_restart_sec(
+                scope_cmd_: list, svc_name_: str, default: float = 0.0,
+            ) -> float:
+                """Read the unit's ``RestartUSec`` (RestartSec) in seconds.
+
+                After a graceful exit-75, systemd waits ``RestartSec`` before
+                respawning the unit.  Callers that poll for ``is-active``
+                must use a timeout >= ``RestartSec`` + transition slack, or
+                they'll give up *during* the cooldown window and wrongly
+                conclude the unit didn't relaunch.
+                """
+                try:
+                    _show = subprocess.run(
+                        scope_cmd_ + [
+                            "show", svc_name_,
+                            "--property=RestartUSec", "--value",
+                        ],
+                        capture_output=True, text=True, timeout=5,
+                    )
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    return default
+                raw = (_show.stdout or "").strip()
+                # systemd emits values like "30s", "100ms", "1min 30s", or
+                # "infinity".  Parse conservatively; on any miss return default.
+                if not raw or raw == "infinity":
+                    return default
+                total = 0.0
+                matched = False
+                for part in raw.split():
+                    for _suf, _mult in (
+                        ("ms", 0.001),
+                        ("us", 0.000001),
+                        ("min", 60.0),
+                        ("s", 1.0),
+                    ):
+                        if part.endswith(_suf):
+                            try:
+                                total += float(part[: -len(_suf)]) * _mult
+                                matched = True
+                            except ValueError:
+                                pass
+                            break
+                return total if matched else default
+
            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
            # for up to ``agent.restart_drain_timeout`` (default 60s) before
            # exiting with code 75; we wait slightly longer so the drain
@@ -6152,14 +6235,23 @@ def _cmd_update_impl(args, gateway_mode: bool):

                            if _graceful_ok:
                                # Gateway exited 75; systemd should relaunch
-                                # via Restart=on-failure.  Verify the new
-                                # process came up.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True, text=True, timeout=5,
+                                # via Restart=on-failure.  The unit's
+                                # RestartSec (default 30s on ours) gates the
+                                # respawn — poll past that + slack so we
+                                # don't give up mid-cooldown and falsely
+                                # print "drained but didn't relaunch".  For
+                                # units without RestartSec set we fall back
+                                # to the original 10s budget.
+                                _restart_sec = _service_restart_sec(
+                                    scope_cmd, svc_name, default=0.0,
                                )
-                                if verify.stdout.strip() == "active":
+                                _post_drain_timeout = max(
+                                    10.0, _restart_sec + 10.0,
+                                )
+                                if _wait_for_service_active(
+                                    scope_cmd, svc_name,
+                                    timeout=_post_drain_timeout,
+                                ):
                                    restarted_services.append(svc_name)
                                    continue
                                # Process exited but wasn't respawned (older
@@ -6185,14 +6277,9 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                # Verify the service actually survived the
                                # restart.  systemctl restart returns 0 even
                                # if the new process crashes immediately.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True,
-                                    text=True,
-                                    timeout=5,
-                                )
-                                if verify.stdout.strip() == "active":
+                                if _wait_for_service_active(
+                                    scope_cmd, svc_name, timeout=10.0,
+                                ):
                                    restarted_services.append(svc_name)
                                else:
                                    # Retry once — transient startup failures
@@ -6207,14 +6294,9 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                        text=True,
                                        timeout=15,
                                    )
-                                    _time.sleep(3)
-                                    verify2 = subprocess.run(
-                                        scope_cmd + ["is-active", svc_name],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=5,
-                                    )
-                                    if verify2.stdout.strip() == "active":
+                                    if _wait_for_service_active(
+                                        scope_cmd, svc_name, timeout=10.0,
+                                    ):
                                        restarted_services.append(svc_name)
                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
@@ -6821,6 +6903,40 @@ For more help on a command:
    parser.add_argument(
        "--version", "-V", action="store_true", help="Show version and exit"
    )
+    parser.add_argument(
+        "-z",
+        "--oneshot",
+        metavar="PROMPT",
+        default=None,
+        help=(
+            "One-shot mode: send a single prompt and print ONLY the final "
+            "response text to stdout. No banner, no spinner, no tool "
+            "previews, no session_id line. Tools, memory, rules, and "
+            "AGENTS.md in the CWD are loaded as normal; approvals are "
+            "auto-bypassed. Intended for scripts / pipes."
+        ),
+    )
+    # --model / --provider are accepted at the top level so they can pair
+    # with -z without needing the `chat` subcommand.  If neither -z nor a
+    # subcommand consumes them, they fall through harmlessly as None.
+    # Mirrors `hermes chat --model ... --provider ...` semantics.
+    parser.add_argument(
+        "-m",
+        "--model",
+        default=None,
+        help=(
+            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
+        ),
+    )
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help=(
+            "Provider override for this invocation (e.g. openrouter, anthropic). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+        ),
+    )
    parser.add_argument(
        "--resume",
        "-r",
@@ -9101,6 +9217,17 @@ Examples:
                exc_info=True,
            )

+    # Handle top-level --oneshot / -z: single-shot mode, stdout = final
+    # response only, nothing else. Bypasses cli.py entirely.
+    if getattr(args, "oneshot", None):
+        from hermes_cli.oneshot import run_oneshot
+
+        sys.exit(run_oneshot(
+            args.oneshot,
+            model=getattr(args, "model", None),
+            provider=getattr(args, "provider", None),
+        ))
+
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -1379,27 +1379,93 @@ def curated_models_for_provider(
    return [(m, "") for m in models]


-def detect_provider_for_model(
+def _provider_keys(provider: str) -> set[str]:
+    key = (provider or "").strip().lower()
+    normalized = normalize_provider(provider)
+    return {k for k in (key, normalized) if k}
+
+
+def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
+    return any(
+        name_lower == model.lower()
+        for provider in providers
+        for model in _PROVIDER_MODELS.get(provider, [])
+    )
+
+
+_AGGREGATOR_PROVIDERS = frozenset(
+    {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
+)
+
+
+def _resolve_static_model_alias(
+    name_lower: str,
+    current_keys: set[str],
+) -> Optional[tuple[str, str]]:
+    """Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
+    try:
+        from hermes_cli.model_switch import MODEL_ALIASES
+    except Exception:
+        return None
+
+    identity = MODEL_ALIASES.get(name_lower)
+    if identity is None:
+        return None
+
+    vendor = identity.vendor
+    family = identity.family
+
+    def _match(provider: str) -> Optional[str]:
+        models = _PROVIDER_MODELS.get(provider, [])
+        if not models:
+            return None
+        prefix = (
+            f"{vendor}/{family}"
+            if provider in _AGGREGATOR_PROVIDERS
+            else family
+        ).lower()
+        for model in models:
+            if model.lower().startswith(prefix):
+                return model
+        return None
+
+    for provider in current_keys:
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _PROVIDER_MODELS:
+        if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
+            continue
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _AGGREGATOR_PROVIDERS:
+        if provider in current_keys and (matched := _match(provider)):
+            return provider, matched
+
+    return None
+
+
+def detect_static_provider_for_model(
    model_name: str,
    current_provider: str,
 ) -> Optional[tuple[str, str]]:
-    """Auto-detect the best provider for a model name.
+    """Auto-detect a provider from static catalogs only.

-    Returns ``(provider_id, model_name)`` — the model name may be remapped
-    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``(provider_id, model_name)``. The model name may be remapped
+    when a static alias or bare provider name resolves to a catalog default.
    Returns ``None`` when no confident match is found.
-
-    Priority:
-    0. Bare provider name → switch to that provider's default model
-    1. Direct provider with credentials (highest)
-    2. Direct provider without credentials → remap to OpenRouter slug
-    3. OpenRouter catalog match
    """
    name = (model_name or "").strip()
    if not name:
        return None

    name_lower = name.lower()
+    current_keys = _provider_keys(current_provider)
+
+    alias_match = _resolve_static_model_alias(name_lower, current_keys)
+    if alias_match:
+        return alias_match

    # --- Step 0: bare provider name typed as model ---
    # If someone types `/model nous` or `/model anthropic`, treat it as a
@@ -1412,64 +1478,49 @@ def detect_provider_for_model(
        if (
            resolved_provider in _PROVIDER_LABELS
            and default_models
-            and resolved_provider != normalize_provider(current_provider)
+            and resolved_provider not in current_keys
        ):
            return (resolved_provider, default_models[0])

    # Aggregators list other providers' models — never auto-switch TO them
-    _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
-
    # If the model belongs to the current provider's catalog, don't suggest switching
-    current_models = _PROVIDER_MODELS.get(current_provider, [])
-    if any(name_lower == m.lower() for m in current_models):
+    if _model_in_provider_catalog(name_lower, current_keys):
        return None

    # --- Step 1: check static provider catalogs for a direct match ---
-    direct_match: Optional[str] = None
    for pid, models in _PROVIDER_MODELS.items():
-        if pid == current_provider or pid in _AGGREGATORS:
+        if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
            continue
        if any(name_lower == m.lower() for m in models):
-            direct_match = pid
-            break
+            return (pid, name)

-    if direct_match:
-        # Check if we have credentials for this provider — env vars,
-        # credential pool, or auth store entries.
-        has_creds = False
-        try:
-            from hermes_cli.auth import PROVIDER_REGISTRY
-            pconfig = PROVIDER_REGISTRY.get(direct_match)
-            if pconfig:
-                for env_var in pconfig.api_key_env_vars:
-                    if os.getenv(env_var, "").strip():
-                        has_creds = True
-                        break
-        except Exception:
-            pass
-        # Also check credential pool and auth store — covers OAuth,
-        # Claude Code tokens, and other non-env-var credentials (#10300).
-        if not has_creds:
-            try:
-                from agent.credential_pool import load_pool
-                pool = load_pool(direct_match)
-                if pool.has_credentials():
-                    has_creds = True
-            except Exception:
-                pass
-        if not has_creds:
-            try:
-                from hermes_cli.auth import _load_auth_store
-                store = _load_auth_store()
-                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
-                    has_creds = True
-            except Exception:
-                pass
+    return None

-        # Always return the direct provider match.  If credentials are
-        # missing, the client init will give a clear error rather than
-        # silently routing through the wrong provider (#10300).
-        return (direct_match, name)
+
+def detect_provider_for_model(
+    model_name: str,
+    current_provider: str,
+) -> Optional[tuple[str, str]]:
+    """Auto-detect the best provider for a model name.
+
+    Returns ``(provider_id, model_name)`` — the model name may be remapped
+    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``None`` when no confident match is found.
+
+    Priority:
+    0. Bare provider name → switch to that provider's default model
+    1. Direct provider static catalog match
+    2. OpenRouter catalog match
+    """
+    name = (model_name or "").strip()
+    if not name:
+        return None
+
+    static_match = detect_static_provider_for_model(name, current_provider)
+    if static_match:
+        return static_match
+    if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
+        return None

    # --- Step 2: check OpenRouter catalog ---
    # First try exact match (handles provider/model format)
@@ -0,0 +1,202 @@
+"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
+
+Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
+no stderr chatter.  Just the agent's final text to stdout.
+
+Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
+Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
+Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
+
+Model / provider selection mirrors `hermes chat`:
+    - Both optional. If omitted, use the user's configured default.
+    - If both given, pair them exactly as given.
+    - If only --model given, auto-detect the provider that serves it.
+    - If only --provider given, error out (ambiguous — caller must pick a model).
+
+Env var fallbacks (used when the corresponding arg is not passed):
+    - HERMES_INFERENCE_MODEL
+    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from contextlib import redirect_stderr, redirect_stdout
+from typing import Optional
+
+
+def run_oneshot(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> int:
+    """Execute a single prompt and print only the final content block.
+
+    Args:
+        prompt: The user message to send.
+        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
+            env var, then config.yaml's model.default / model.model.
+        provider: Optional provider override. Falls back to
+            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
+            then "auto".
+
+    Returns the exit code.  Caller should sys.exit() with the return.
+    """
+    # Silence every stdlib logger for the duration.  AIAgent, tools, and
+    # provider adapters all log to stderr through the root logger; file
+    # handlers added by setup_logging() keep working (they're attached to
+    # the root logger's handler list, not affected by level), but no
+    # bytes reach the terminal.
+    logging.disable(logging.CRITICAL)
+
+    # --provider without --model is ambiguous: carrying the user's configured
+    # model across to a different provider is usually wrong (that provider may
+    # not host it), and silently picking the provider's catalog default hides
+    # the mismatch.  Require the caller to be explicit.  Validate BEFORE the
+    # stderr redirect so the message actually reaches the terminal.
+    env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    if provider and not ((model or "").strip() or env_model_early):
+        sys.stderr.write(
+            "hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
+            "Pass both explicitly, or neither to use your configured defaults.\n"
+        )
+        return 2
+
+    # Auto-approve any shell / tool approvals.  Non-interactive by
+    # definition — a prompt would hang forever.
+    os.environ["HERMES_YOLO_MODE"] = "1"
+    os.environ["HERMES_ACCEPT_HOOKS"] = "1"
+
+    # Redirect stderr AND stdout to devnull for the entire call tree.
+    # We'll print the final response to the real stdout at the end.
+    real_stdout = sys.stdout
+    devnull = open(os.devnull, "w")
+
+    try:
+        with redirect_stdout(devnull), redirect_stderr(devnull):
+            response = _run_agent(prompt, model=model, provider=provider)
+    finally:
+        try:
+            devnull.close()
+        except Exception:
+            pass
+
+    if response:
+        real_stdout.write(response)
+        if not response.endswith("\n"):
+            real_stdout.write("\n")
+        real_stdout.flush()
+    return 0
+
+
+def _run_agent(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> str:
+    """Build an AIAgent exactly like a normal CLI chat turn would, then
+    run a single conversation.  Returns the final response string."""
+    # Imports are local so they don't run when hermes is invoked for
+    # other commands (keeps top-level CLI startup cheap).
+    from hermes_cli.config import load_config
+    from hermes_cli.models import detect_provider_for_model
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+    from hermes_cli.tools_config import _get_platform_tools
+    from run_agent import AIAgent
+
+    cfg = load_config()
+
+    # Resolve effective model: explicit arg → env var → config.
+    model_cfg = cfg.get("model") or {}
+    if isinstance(model_cfg, str):
+        cfg_model = model_cfg
+    else:
+        cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    effective_model = (model or "").strip() or env_model or cfg_model
+
+    # Resolve effective provider: explicit arg → (auto-detect from model if
+    # model was explicit) → env / config (handled inside resolve_runtime_provider).
+    #
+    # When --model is given without --provider, auto-detect the provider that
+    # serves that model — same semantic as `/model <name>` in an interactive
+    # session.  Without this, resolve_runtime_provider() would fall back to
+    # the user's configured default provider, which may not host the model
+    # the caller just asked for.
+    effective_provider = (provider or "").strip() or None
+    if effective_provider is None and (model or env_model):
+        # Only auto-detect when the model was explicitly requested via arg or
+        # env var (not when it came from config — that's the "use my defaults"
+        # path and the configured provider is already correct).
+        explicit_model = (model or "").strip() or env_model
+        if explicit_model:
+            cfg_provider = ""
+            if isinstance(model_cfg, dict):
+                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+            current_provider = (
+                cfg_provider
+                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                or "auto"
+            )
+            detected = detect_provider_for_model(explicit_model, current_provider)
+            if detected:
+                effective_provider, effective_model = detected
+
+    runtime = resolve_runtime_provider(
+        requested=effective_provider,
+        target_model=effective_model or None,
+    )
+
+    # Pull in whatever toolsets the user has enabled for "cli".
+    # sorted() gives stable ordering; set→list for AIAgent's signature.
+    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+
+    agent = AIAgent(
+        api_key=runtime.get("api_key"),
+        base_url=runtime.get("base_url"),
+        provider=runtime.get("provider"),
+        api_mode=runtime.get("api_mode"),
+        model=effective_model,
+        enabled_toolsets=toolsets_list,
+        quiet_mode=True,
+        platform="cli",
+        credential_pool=runtime.get("credential_pool"),
+        # Interactive callbacks are intentionally NOT wired beyond this
+        # one.  In oneshot mode there's no user sitting at a terminal:
+        #   - clarify  → returns a synthetic "pick a default" instruction
+        #                so the agent continues instead of stalling on
+        #                the tool's built-in "not available" error
+        #   - sudo password prompt → terminal_tool gates on
+        #                HERMES_INTERACTIVE which we never set
+        #   - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
+        #                (set above); also falls back to deny on non-tty
+        #   - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
+        #   - skill secret capture → returns gracefully when no callback set
+        clarify_callback=_oneshot_clarify_callback,
+    )
+
+    # Belt-and-braces: make sure AIAgent doesn't invoke any streaming
+    # display callbacks that would bypass our stdout capture.
+    agent.suppress_status_output = True
+    agent.stream_delta_callback = None
+    agent.tool_gen_callback = None
+
+    return agent.chat(prompt) or ""
+
+
+def _oneshot_clarify_callback(question: str, choices=None) -> str:
+    """Clarify is disabled in oneshot mode — tell the agent to pick a
+    default and proceed instead of stalling or erroring."""
+    if choices:
+        return (
+            f"[oneshot mode: no user available. Pick the best option from "
+            f"{choices} using your own judgment and continue.]"
+        )
+    return (
+        "[oneshot mode: no user available. Make the most reasonable "
+        "assumption you can and continue.]"
+    )
@@ -68,25 +68,58 @@ CONFIGURABLE_TOOLSETS = [
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
    ("spotify",          "🎵 Spotify",                  "playback, search, playlists, library"),
+    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
+    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
+
+# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
+# these platforms, and only resolve/save for these platforms.  A toolset
+# absent from this map is available on every platform (current behaviour).
+#
+# Use this for tools whose APIs only make sense on one platform (Discord
+# server admin, Slack workspace admin, etc.).  Keeps every other platform's
+# checklist from filling up with irrelevant toggles.
+_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
+    "discord": {"discord"},
+    "discord_admin": {"discord"},
+}
+
+
+def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
+    """Return True if ``ts_key`` is configurable on ``platform``.
+
+    Toolsets without a restriction entry are allowed everywhere (the default).
+    """
+    allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
+    return allowed is None or platform in allowed


 def _get_effective_configurable_toolsets():
    """Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.

    Plugin toolsets are appended at the end so they appear after the
-    built-in toolsets in the TUI checklist.
+    built-in toolsets in the TUI checklist. A plugin whose toolset key
+    already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
+    plugins (e.g. ``plugins/spotify``) share their toolset key with the
+    built-in entry, and we want the built-in label/description to win.
+    Without the dedupe, ``hermes tools`` → "reconfigure existing" would
+    list the same toolset twice.
    """
    result = list(CONFIGURABLE_TOOLSETS)
+    seen = {ts_key for ts_key, _, _ in result}
    try:
        from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
        discover_plugins()  # idempotent — ensures plugins are loaded
-        result.extend(get_plugin_toolsets())
+        for entry in get_plugin_toolsets():
+            if entry[0] in seen:
+                continue
+            seen.add(entry[0])
+            result.append(entry)
    except Exception:
        pass
    return result
@@ -591,7 +624,7 @@ def _get_platform_tools(
    include_default_mcp_servers: bool = True,
 ) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset
+    from toolsets import resolve_toolset, TOOLSETS

    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)
@@ -605,6 +638,8 @@ def _get_platform_tools(
    toolset_names = [str(ts) for ts in toolset_names]

    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    plugin_ts_keys = _get_plugin_toolset_keys()
+    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # If the saved list contains any configurable keys directly, the user
    # has explicitly configured this platform — use direct membership.
@@ -614,7 +649,10 @@ def _get_platform_tools(
    has_explicit_config = any(ts in configurable_keys for ts in toolset_names)

    if has_explicit_config:
-        enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
+        enabled_toolsets = {
+            ts for ts in toolset_names
+            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
+        }
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -624,14 +662,52 @@ def _get_platform_tools(

        enabled_toolsets = set()
        for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
+            if not _toolset_allowed_for_platform(ts_key, platform):
+                continue
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+
        default_off = set(_DEFAULT_OFF_TOOLSETS)
-        if platform in default_off:
+        # Legacy safety: if the platform's own name matches a default-off
+        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
+        # keep that toolset enabled on first install.  Skip this dodge for
+        # platform-restricted toolsets — those are always opt-in even on
+        # their own platform (e.g. `discord` + `discord` should stay OFF).
+        if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
            default_off.remove(platform)
        enabled_toolsets -= default_off

+    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
+    # feishu_drive).  These are part of the platform's default composite but
+    # absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
+    # checklist or in a user-saved config.  Must run in BOTH branches —
+    # otherwise saving via `hermes tools` (which flips has_explicit_config
+    # to True) silently drops them.
+    platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
+    configurable_tool_universe = set()
+    for ck in configurable_keys:
+        configurable_tool_universe.update(resolve_toolset(ck))
+    claimed = set()
+    for ts_key in enabled_toolsets:
+        claimed.update(resolve_toolset(ts_key))
+    skip = configurable_keys | plugin_ts_keys | platform_default_keys
+    skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
+    skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
+    for ts_key, ts_def in TOOLSETS.items():
+        if ts_key in skip:
+            continue
+        if ts_def.get("includes"):
+            continue
+        ts_tools = set(resolve_toolset(ts_key))
+        if not ts_tools or not ts_tools.issubset(platform_tool_universe):
+            continue
+        if ts_tools.issubset(configurable_tool_universe):
+            continue
+        if not ts_tools.issubset(claimed):
+            enabled_toolsets.add(ts_key)
+            claimed.update(ts_tools)
+
    # Plugin toolsets: enabled by default unless explicitly disabled, or
    # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
    # shipped as a bundled plugin but user must opt in via `hermes tools`
@@ -639,7 +715,6 @@ def _get_platform_tools(
    # A plugin toolset is "known" for a platform once `hermes tools`
    # has been saved for that platform (tracked via known_plugin_toolsets).
    # Unknown plugins default to enabled; known-but-absent = disabled.
-    plugin_ts_keys = _get_plugin_toolset_keys()
    if plugin_ts_keys:
        known_map = config.get("known_plugin_toolsets", {})
        known_for_platform = set(known_map.get(platform, []))
@@ -657,7 +732,6 @@ def _get_platform_tools(

    # Preserve any explicit non-configurable toolset entries (for example,
    # custom toolsets or MCP server names saved in platform_toolsets).
-    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
    explicit_passthrough = {
        ts
        for ts in toolset_names
@@ -703,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    """
    config.setdefault("platform_toolsets", {})

+    # Drop platform-scoped toolsets that don't apply here.  Prevents the
+    # "Configure all platforms" checklist (or a hand-edited config.yaml)
+    # from turning on, say, the `discord` toolset for Telegram.
+    enabled_toolset_keys = {
+        ts for ts in enabled_toolset_keys
+        if _toolset_allowed_for_platform(ts, platform)
+    }
+
    # Get the set of all configurable toolset keys (built-in + plugin)
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
    plugin_keys = _get_plugin_toolset_keys()
@@ -717,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
+    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -724,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
+    # Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
+    # saving from the picker as consent to clear the "no_mcp" sentinel. The
+    # picker has no checkbox for no_mcp, so without this users who once set it
+    # by hand could never re-enable MCP servers through the UI.
+    preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -831,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    return _tool_token_cache


-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
+def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
    from hermes_cli.curses_ui import curses_checklist
    from toolsets import resolve_toolset
@@ -839,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
    # Pre-compute per-tool token counts (cached after first call).
    tool_tokens = _estimate_tool_tokens()

-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    # Drop platform-scoped toolsets that don't apply to this platform.
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]

    labels = []
    for ts_key, ts_label, ts_desc in effective:
@@ -1753,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS

            # Show checklist
-            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)

            added = new_enabled - current_enabled
            removed = current_enabled - new_enabled
@@ -2109,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]

 def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]
    builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    print(f"Built-in toolsets ({platform}):")
@@ -2175,6 +2272,20 @@ def tools_disable_enable_command(args):
            _print_error(f"Unknown toolset '{name}'")
        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]

+    # Reject platform-scoped toolsets on platforms that don't allow them.
+    restricted_targets = [
+        t for t in toolset_targets
+        if not _toolset_allowed_for_platform(t, platform)
+    ]
+    if restricted_targets:
+        for name in restricted_targets:
+            allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
+            _print_error(
+                f"Toolset '{name}' is not available on platform '{platform}' "
+                f"(only: {', '.join(allowed)})"
+            )
+        toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
+
    if toolset_targets:
        _apply_toolset_change(config, platform, toolset_targets, action)

@@ -53,7 +53,7 @@ try:
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
-    from pydantic import BaseModel, field_validator
+    from pydantic import BaseModel
 except ImportError:
    raise SystemExit(
        "Web UI requires fastapi and uvicorn.\n"
@@ -425,20 +425,6 @@ class EnvVarUpdate(BaseModel):
    key: str
    value: str

-    @field_validator("key")
-    @classmethod
-    def key_must_be_nonempty(cls, v: str) -> str:
-        if not v.strip():
-            raise ValueError("key must not be empty")
-        return v
-
-    @field_validator("value")
-    @classmethod
-    def value_must_be_nonempty(cls, v: str) -> str:
-        if not v.strip():
-            raise ValueError("value must not be empty; use DELETE /api/env to remove a key")
-        return v
-

 class EnvVarDelete(BaseModel):
    key: str
@@ -288,30 +288,34 @@ def get_tool_definitions(
                filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                break

-    # Rebuild discord_server schema based on the bot's privileged intents
-    # (detected from GET /applications/@me) and the user's action allowlist
-    # in config.  Hides actions the bot's intents don't support so the
-    # model never attempts them, and annotates fetch_messages when the
+    # Rebuild discord / discord_admin schemas based on the bot's privileged
+    # intents (detected from GET /applications/@me) and the user's action
+    # allowlist in config.  Hides actions the bot's intents don't support so
+    # the model never attempts them, and annotates fetch_messages when the
    # MESSAGE_CONTENT intent is missing.
-    if "discord_server" in available_tool_names:
-        try:
-            from tools.discord_tool import get_dynamic_schema
-            dynamic = get_dynamic_schema()
-        except Exception:  # pragma: no cover — defensive, fall back to static
-            dynamic = None
-        if dynamic is None:
-            # Tool filtered out entirely (empty allowlist or detection disabled
-            # the only remaining actions).  Drop it from the schema list.
-            filtered_tools = [
-                t for t in filtered_tools
-                if t.get("function", {}).get("name") != "discord_server"
-            ]
-            available_tool_names.discard("discord_server")
-        else:
-            for i, td in enumerate(filtered_tools):
-                if td.get("function", {}).get("name") == "discord_server":
-                    filtered_tools[i] = {"type": "function", "function": dynamic}
-                    break
+    _discord_schema_fns = {
+        "discord": "get_dynamic_schema_core",
+        "discord_admin": "get_dynamic_schema_admin",
+    }
+    for discord_tool_name in _discord_schema_fns:
+        if discord_tool_name in available_tool_names:
+            try:
+                from tools import discord_tool as _dt
+                schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
+                dynamic = schema_fn()
+            except Exception:
+                dynamic = None
+            if dynamic is None:
+                filtered_tools = [
+                    t for t in filtered_tools
+                    if t.get("function", {}).get("name") != discord_tool_name
+                ]
+                available_tool_names.discard(discord_tool_name)
+            else:
+                for i, td in enumerate(filtered_tools):
+                    if td.get("function", {}).get("name") == discord_tool_name:
+                        filtered_tools[i] = {"type": "function", "function": dynamic}
+                        break

    # Strip web tool cross-references from browser_navigate description when
    # web_search / web_extract are not available.  The static schema says
@@ -91,4 +91,29 @@

  // Register this plugin — the dashboard picks it up automatically.
  window.__HERMES_PLUGINS__.register("example", ExamplePage);
+
+  // ─────────────────────────────────────────────────────────────────────
+  // Page-scoped slot demo: inject a small banner at the top of /sessions.
+  //
+  // Built-in pages expose named slots (<page>:top, <page>:bottom) that
+  // plugins can populate without overriding the whole route. The
+  // manifest lists the slots we use in its `slots` array so the shell
+  // knows to render <PluginSlot name="sessions:top" /> there.
+  // ─────────────────────────────────────────────────────────────────────
+  function SessionsTopBanner() {
+    return React.createElement(Card, {
+      className: "border-dashed",
+    },
+      React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
+        React.createElement(Badge, { variant: "outline" }, "Example"),
+        React.createElement("span", {
+          className: "text-xs text-muted-foreground",
+        }, "This banner was injected into the Sessions page by the example plugin via the ",
+          React.createElement("code", { className: "font-courier" }, "sessions:top"),
+          " slot."),
+      ),
+    );
+  }
+
+  window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
 })();
@@ -8,6 +8,7 @@
    "path": "/example",
    "position": "after:skills"
  },
+  "slots": ["sessions:top"],
  "entry": "dist/index.js",
  "api": "plugin_api.py"
 }
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
 # even if shutdown_memory_provider is never called (e.g. gateway crash,
-# SIGKILL, or exception in _async_flush_memories preventing shutdown).
+# SIGKILL, or exception in the session expiry watcher preventing shutdown).
 # ---------------------------------------------------------------------------
 _last_active_provider: Optional["OpenVikingMemoryProvider"] = None

@@ -1578,7 +1578,6 @@ class AIAgent:
        self._memory_enabled = False
        self._user_profile_enabled = False
        self._memory_nudge_interval = 10
-        self._memory_flush_min_turns = 6
        self._turns_since_memory = 0
        self._iters_since_skill = 0
        if not skip_memory:
@@ -1587,7 +1586,6 @@ class AIAgent:
                self._memory_enabled = mem_config.get("memory_enabled", False)
                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
-                self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
                if self._memory_enabled or self._user_profile_enabled:
                    from tools.memory_tool import MemoryStore
                    self._memory_store = MemoryStore(
@@ -2399,6 +2397,7 @@ class AIAgent:
                base_url=aux_base_url,
                api_key=aux_api_key,
                config_context_length=getattr(self, "_aux_compression_context_length_config", None),
+                provider=getattr(self, "provider", ""),
            )

            # Hard floor: the auxiliary compression model must have at least
@@ -2425,6 +2424,11 @@ class AIAgent:
                # compression actually works this session.  The hard floor
                # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
                # so the new threshold is always >= 64K.
+                #
+                # The compression summariser sends a single user-role
+                # prompt (no system prompt, no tools) to the aux model, so
+                # new_threshold == aux_context is safe: the request is
+                # the raw messages plus a small summarisation instruction.
                old_threshold = threshold
                new_threshold = aux_context
                self.context_compressor.threshold_tokens = new_threshold
@@ -5137,6 +5141,8 @@ class AIAgent:
        # response.incomplete instead of response.completed).
        self._codex_streamed_text_parts: list = []
        for attempt in range(max_stream_retries + 1):
+            if self._interrupt_requested:
+                raise InterruptedError("Agent interrupted before Codex stream retry")
            collected_output_items: list = []
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
@@ -6306,6 +6312,14 @@ class AIAgent:

            try:
                for _stream_attempt in range(_max_stream_retries + 1):
+                    # Check for interrupt before each retry attempt.  Without
+                    # this, /stop closes the HTTP connection (outer poll loop),
+                    # but the retry loop opens a FRESH connection — negating the
+                    # interrupt entirely.  On slow providers (ollama-cloud) each
+                    # retry can block for the full stream-read timeout (120s+),
+                    # causing multi-minute delays between /stop and response.
+                    if self._interrupt_requested:
+                        raise InterruptedError("Agent interrupted before stream retry")
                    try:
                        if self.api_mode == "anthropic_messages":
                            self._try_refresh_anthropic_client_credentials()
@@ -7740,25 +7754,50 @@ class AIAgent:
        if source_msg.get("role") != "assistant":
            return

-        explicit_reasoning = source_msg.get("reasoning_content")
-        if isinstance(explicit_reasoning, str):
-            api_msg["reasoning_content"] = explicit_reasoning
+        # 1. Explicit reasoning_content already set — preserve it verbatim
+        # (includes DeepSeek/Kimi's own empty-string placeholder written at
+        # creation time, and any valid reasoning content from the same provider).
+        existing = source_msg.get("reasoning_content")
+        if isinstance(existing, str):
+            api_msg["reasoning_content"] = existing
            return

+        # 2. DeepSeek / Kimi thinking mode: tool-call turns that lack
+        # reasoning_content are "poisoned history" — a prior provider (MiniMax,
+        # etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
+        # is absent on replay; inject "" to satisfy the provider's requirement
+        # without forwarding any cross-provider reasoning content.
+        needs_empty_reasoning = (
+            source_msg.get("tool_calls")
+            and (
+                self._needs_kimi_tool_reasoning()
+                or self._needs_deepseek_tool_reasoning()
+            )
+        )
+        if needs_empty_reasoning:
+            api_msg["reasoning_content"] = ""
+            return
+
+        # 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
+        # for providers that use the internal 'reasoning' key.
        normalized_reasoning = source_msg.get("reasoning")
        if isinstance(normalized_reasoning, str) and normalized_reasoning:
            api_msg["reasoning_content"] = normalized_reasoning
            return

-        # Providers that require an echoed reasoning_content on every
-        # assistant tool-call turn. Detection logic lives in the per-provider
-        # helpers so both the creation path (_build_assistant_message) and
-        # this replay path stay in sync.
-        if source_msg.get("tool_calls") and (
+        # 4. DeepSeek / Kimi thinking mode: all assistant messages need
+        # reasoning_content. Inject "" to satisfy the provider's requirement
+        # when no explicit reasoning content is present.
+        if (
            self._needs_kimi_tool_reasoning()
            or self._needs_deepseek_tool_reasoning()
        ):
            api_msg["reasoning_content"] = ""
+            return
+
+        # 5. reasoning_content was present but not a string (e.g. None after
+        # context compaction).  Don't pass null to the API.
+        api_msg.pop("reasoning_content", None)

    @staticmethod
    def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
@@ -7910,251 +7949,6 @@ class AIAgent:
        """
        return self.api_mode != "codex_responses"

-    def flush_memories(self, messages: list = None, min_turns: int = None):
-        """Give the model one turn to persist memories before context is lost.
-
-        Called before compression, session reset, or CLI exit. Injects a flush
-        message, makes one API call, executes any memory tool calls, then
-        strips all flush artifacts from the message list.
-
-        Args:
-            messages: The current conversation messages. If None, uses
-                      self._session_messages (last run_conversation state).
-            min_turns: Minimum user turns required to trigger the flush.
-                       None = use config value (flush_min_turns).
-                       0 = always flush (used for compression).
-        """
-        if self._memory_flush_min_turns == 0 and min_turns is None:
-            return
-        if "memory" not in self.valid_tool_names or not self._memory_store:
-            return
-        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
-        if self._user_turn_count < effective_min:
-            return
-
-        if messages is None:
-            messages = getattr(self, '_session_messages', None)
-        if not messages or len(messages) < 3:
-            return
-
-        flush_content = (
-            "[System: The session is being compressed. "
-            "Save anything worth remembering — prioritize user preferences, "
-            "corrections, and recurring patterns over task-specific details.]"
-        )
-        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
-        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
-        messages.append(flush_msg)
-
-        try:
-            # Build API messages for the flush call
-            _needs_sanitize = self._should_sanitize_tool_calls()
-            api_messages = []
-            for msg in messages:
-                api_msg = msg.copy()
-                self._copy_reasoning_content_for_api(msg, api_msg)
-                api_msg.pop("reasoning", None)
-                api_msg.pop("finish_reason", None)
-                api_msg.pop("_flush_sentinel", None)
-                api_msg.pop("_thinking_prefill", None)
-                if _needs_sanitize:
-                    self._sanitize_tool_calls_for_strict_api(api_msg)
-                api_messages.append(api_msg)
-
-            if self._cached_system_prompt:
-                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
-
-            # Make one API call with only the memory tool available
-            memory_tool_def = None
-            for t in (self.tools or []):
-                if t.get("function", {}).get("name") == "memory":
-                    memory_tool_def = t
-                    break
-
-            if not memory_tool_def:
-                messages.pop()  # remove flush msg
-                return
-
-            # Use auxiliary client for the flush call when available --
-            # it's cheaper and avoids Codex Responses API incompatibility.
-            from agent.auxiliary_client import (
-                call_llm as _call_llm,
-                _fixed_temperature_for_model,
-                OMIT_TEMPERATURE,
-            )
-            _aux_available = True
-            # Kimi models manage temperature server-side — omit it entirely.
-            # Other models with a fixed contract get that value; everyone else
-            # gets the historical 0.3 default.
-            _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
-            _omit_temperature = _fixed_temp is OMIT_TEMPERATURE
-            if _omit_temperature:
-                _flush_temperature = None
-            elif _fixed_temp is not None:
-                _flush_temperature = _fixed_temp
-            else:
-                _flush_temperature = 0.3
-            aux_error = None
-            try:
-                response = _call_llm(
-                    task="flush_memories",
-                    messages=api_messages,
-                    tools=[memory_tool_def],
-                    temperature=_flush_temperature,
-                    max_tokens=5120,
-                    # timeout resolved from auxiliary.flush_memories.timeout config
-                )
-            except Exception as e:
-                aux_error = e
-                _aux_available = False
-                response = None
-
-            if not _aux_available and self.api_mode == "codex_responses":
-                # No auxiliary client -- use the Codex Responses path directly
-                codex_kwargs = self._build_api_kwargs(api_messages)
-                _ct_flush = self._get_transport()
-                if _ct_flush is not None:
-                    codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
-                elif not codex_kwargs.get("tools"):
-                    codex_kwargs["tools"] = [memory_tool_def]
-                if _flush_temperature is not None:
-                    codex_kwargs["temperature"] = _flush_temperature
-                else:
-                    codex_kwargs.pop("temperature", None)
-                if "max_output_tokens" in codex_kwargs:
-                    codex_kwargs["max_output_tokens"] = 5120
-                response = self._run_codex_stream(codex_kwargs)
-            elif not _aux_available and self.api_mode == "anthropic_messages":
-                # Native Anthropic — use the transport for kwargs
-                _tflush = self._get_transport()
-                ant_kwargs = _tflush.build_kwargs(
-                    model=self.model, messages=api_messages,
-                    tools=[memory_tool_def], max_tokens=5120,
-                    reasoning_config=None,
-                    preserve_dots=self._anthropic_preserve_dots(),
-                )
-                response = self._anthropic_messages_create(ant_kwargs)
-            elif not _aux_available:
-                api_kwargs = {
-                    "model": self.model,
-                    "messages": api_messages,
-                    "tools": [memory_tool_def],
-                    **self._max_tokens_param(5120),
-                }
-                if _flush_temperature is not None:
-                    api_kwargs["temperature"] = _flush_temperature
-                from agent.auxiliary_client import _get_task_timeout
-                response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
-                    **api_kwargs, timeout=_get_task_timeout("flush_memories")
-                )
-
-            if aux_error is not None:
-                logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
-                self._emit_auxiliary_failure("memory flush", aux_error)
-
-            def _openai_tool_calls(resp):
-                if resp is not None and hasattr(resp, "choices") and resp.choices:
-                    msg = getattr(resp.choices[0], "message", None)
-                    calls = getattr(msg, "tool_calls", None)
-                    if calls:
-                        return calls
-                return []
-
-            def _codex_output_tool_calls(resp):
-                calls = []
-                for item in getattr(resp, "output", []) or []:
-                    if getattr(item, "type", None) == "function_call":
-                        calls.append(SimpleNamespace(
-                            id=getattr(item, "call_id", None),
-                            type="function",
-                            function=SimpleNamespace(
-                                name=getattr(item, "name", ""),
-                                arguments=getattr(item, "arguments", "{}"),
-                            ),
-                        ))
-                return calls
-
-            # Extract tool calls from the response, handling all API formats
-            tool_calls = []
-            if self.api_mode == "codex_responses" and not _aux_available:
-                _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
-                if _cnr_flush and _cnr_flush.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _cnr_flush.tool_calls
-                    ]
-                else:
-                    tool_calls = _codex_output_tool_calls(response)
-            elif self.api_mode == "anthropic_messages" and not _aux_available:
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_result and _flush_result.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_result.tool_calls
-                    ]
-            elif self.api_mode in ("chat_completions", "bedrock_converse"):
-                # chat_completions / bedrock — normalize through transport
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
-                if _flush_result and _flush_result.tool_calls:
-                    tool_calls = _flush_result.tool_calls
-                else:
-                    tool_calls = _openai_tool_calls(response)
-            elif _aux_available and hasattr(response, "choices") and response.choices:
-                # Auxiliary client returned OpenAI-shaped response while main
-                # api_mode is codex/anthropic — extract tool_calls from .choices
-                tool_calls = _openai_tool_calls(response)
-
-            for tc in tool_calls:
-                if tc.function.name == "memory":
-                    try:
-                        args = json.loads(tc.function.arguments)
-                        flush_target = args.get("target", "memory")
-                        from tools.memory_tool import memory_tool as _memory_tool
-                        _memory_tool(
-                            action=args.get("action"),
-                            target=flush_target,
-                            content=args.get("content"),
-                            old_text=args.get("old_text"),
-                            store=self._memory_store,
-                        )
-                        if self._memory_manager and args.get("action") in ("add", "replace"):
-                            try:
-                                self._memory_manager.on_memory_write(
-                                    args.get("action", ""),
-                                    flush_target,
-                                    args.get("content", ""),
-                                    metadata=self._build_memory_write_metadata(
-                                        write_origin="memory_flush",
-                                        execution_context="flush_memories",
-                                    ),
-                                )
-                            except Exception:
-                                pass
-                        if not self.quiet_mode:
-                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                    except Exception as e:
-                        logger.warning("Memory flush tool call failed: %s", e)
-                        self._emit_auxiliary_failure("memory flush tool", e)
-        except Exception as e:
-            logger.warning("Memory flush API call failed: %s", e)
-            self._emit_auxiliary_failure("memory flush", e)
-        finally:
-            # Strip flush artifacts: remove everything from the flush message onward.
-            # Use sentinel marker instead of identity check for robustness.
-            while messages and messages[-1].get("_flush_sentinel") != _sentinel:
-                messages.pop()
-                if not messages:
-                    break
-            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
-                messages.pop()
-
    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
        """Compress conversation context and split the session in SQLite.

@@ -8173,8 +7967,6 @@ class AIAgent:
            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
            focus_topic,
        )
-        # Pre-compression memory flush: let the model save memories before they're lost
-        self.flush_memories(messages, min_turns=0)

        # Notify external memory provider before compression discards context
        if self._memory_manager:
@@ -29,10 +29,25 @@ BOLD='\033[1m'
 REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
 HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
+# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
+# FHS-style layout for root installs.  Track whether the user gave us an
+# explicit directory — if so we never override it.
+if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
+    INSTALL_DIR="$HERMES_INSTALL_DIR"
+    INSTALL_DIR_EXPLICIT=true
+else
+    INSTALL_DIR=""
+    INSTALL_DIR_EXPLICIT=false
+fi
 PYTHON_VERSION="3.11"
 NODE_VERSION="22"

+# FHS-style root install layout (set by resolve_install_layout when applicable):
+#   code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
+#   data still at /root/.hermes (HERMES_HOME).  Matches Claude Code / Codex CLI
+#   and keeps Docker bind-mounted /root/ volumes lean.
+ROOT_FHS_LAYOUT=false
+
 # Options
 USE_VENV=true
 RUN_SETUP=true
@@ -64,6 +79,7 @@ while [[ $# -gt 0 ]]; do
            ;;
        --dir)
            INSTALL_DIR="$2"
+            INSTALL_DIR_EXPLICIT=true
            shift 2
            ;;
        --hermes-home)
@@ -79,9 +95,20 @@ while [[ $# -gt 0 ]]; do
            echo "  --no-venv      Don't create virtual environment"
            echo "  --skip-setup   Skip interactive setup wizard"
            echo "  --branch NAME  Git branch to install (default: main)"
-            echo "  --dir PATH     Installation directory (default: ~/.hermes/hermes-agent)"
+            echo "  --dir PATH     Installation directory"
+            echo "                   default (non-root):  ~/.hermes/hermes-agent"
+            echo "                   default (root, Linux): /usr/local/lib/hermes-agent"
            echo "  --hermes-home PATH  Data directory (default: ~/.hermes, or \$HERMES_HOME)"
            echo "  -h, --help     Show this help"
+            echo ""
+            echo "Notes:"
+            echo "  When running as root on Linux, Hermes installs the code under"
+            echo "  /usr/local/lib/hermes-agent and links the command into"
+            echo "  /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
+            echo "  Data, config, sessions, and logs still live in \$HERMES_HOME"
+            echo "  (default /root/.hermes).  This keeps Docker bind-mounted volumes"
+            echo "  small and ensures the command is on PATH for all shells."
+            echo "  Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
            exit 0
            ;;
        *)
@@ -163,9 +190,60 @@ is_termux() {
    [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
 }

+# Decide where the repo checkout + venv live, and where the `hermes` command
+# symlink goes.  Called after detect_os so $OS/$DISTRO are known.
+#
+# Defaults:
+#   - Non-root, any OS:       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $HOME/.local/bin
+#   - Termux (any uid):       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $PREFIX/bin (already on PATH)
+#   - Root on Linux (new):    INSTALL_DIR = /usr/local/lib/hermes-agent
+#                             command link in /usr/local/bin
+#                             (unless a legacy install already exists at
+#                              $HERMES_HOME/hermes-agent — then preserve it)
+#
+# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
+resolve_install_layout() {
+    if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
+        log_info "Install directory: $INSTALL_DIR (explicit)"
+        return 0
+    fi
+
+    # Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
+    if is_termux; then
+        INSTALL_DIR="$HERMES_HOME/hermes-agent"
+        return 0
+    fi
+
+    # Root on Linux: prefer FHS layout unless a legacy install already exists.
+    # macOS root installs keep the legacy layout because /usr/local/ on macOS
+    # is Homebrew territory and we don't want to fight that.
+    if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
+        if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
+            INSTALL_DIR="$HERMES_HOME/hermes-agent"
+            log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
+            log_info "  (new root installs use /usr/local/lib/hermes-agent)"
+            return 0
+        fi
+        INSTALL_DIR="/usr/local/lib/hermes-agent"
+        ROOT_FHS_LAYOUT=true
+        log_info "Root install on Linux — using FHS layout"
+        log_info "  Code:    $INSTALL_DIR"
+        log_info "  Command: /usr/local/bin/hermes"
+        log_info "  Data:    $HERMES_HOME (unchanged)"
+        return 0
+    fi
+
+    # Default: non-root, non-Termux → legacy user-scoped layout.
+    INSTALL_DIR="$HERMES_HOME/hermes-agent"
+}
+
 get_command_link_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo "$PREFIX/bin"
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo "/usr/local/bin"
    else
        echo "$HOME/.local/bin"
    fi
@@ -174,6 +252,8 @@ get_command_link_dir() {
 get_command_link_display_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo '$PREFIX/bin'
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo '/usr/local/bin'
    else
        echo '~/.local/bin'
    fi
@@ -975,6 +1055,14 @@ setup_path() {
        return 0
    fi

+    # FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
+    if [ "$ROOT_FHS_LAYOUT" = true ]; then
+        export PATH="$command_link_dir:$PATH"
+        log_info "/usr/local/bin is already on PATH for all shells"
+        log_success "hermes command ready"
+        return 0
+    fi
+
    # Check if ~/.local/bin is on PATH; if not, add it to shell config.
    # Detect the user's actual login shell (not the shell running this script,
    # which is always bash when piped from curl).
@@ -1339,12 +1427,12 @@ print_success() {
    echo ""

    # Show file locations
-    echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
+    echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
    echo ""
-    echo -e "   ${YELLOW}Config:${NC}    ~/.hermes/config.yaml"
-    echo -e "   ${YELLOW}API Keys:${NC}  ~/.hermes/.env"
-    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/cron/, sessions/, logs/"
-    echo -e "   ${YELLOW}Code:${NC}      ~/.hermes/hermes-agent/"
+    echo -e "   ${YELLOW}Config:${NC}    $HERMES_HOME/config.yaml"
+    echo -e "   ${YELLOW}API Keys:${NC}  $HERMES_HOME/.env"
+    echo -e "   ${YELLOW}Data:${NC}      $HERMES_HOME/cron/, sessions/, logs/"
+    echo -e "   ${YELLOW}Code:${NC}      $INSTALL_DIR"
    echo ""

    echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
@@ -1364,6 +1452,9 @@ print_success() {
    if [ "$DISTRO" = "termux" ]; then
        echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
        echo ""
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
+        echo ""
    else
        echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
        echo ""
@@ -1415,6 +1506,7 @@ main() {
    print_banner

    detect_os
+    resolve_install_layout
    install_uv
    check_python
    check_git
@@ -92,6 +92,7 @@ AUTHOR_MAP = {
    "104278804+Sertug17@users.noreply.github.com": "Sertug17",
    "112503481+caentzminger@users.noreply.github.com": "caentzminger",
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
+    "xydarcher@uestc.edu.cn": "Readon",
    "sir_even@icloud.com": "sirEven",
    "36056348+sirEven@users.noreply.github.com": "sirEven",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
@@ -504,6 +505,7 @@ AUTHOR_MAP = {
    "screenmachine@gmail.com": "teknium1",
    "chenzeshi@live.com": "chen1749144759",
    "mor.aleksandr@yahoo.com": "MorAlekss",
+    "ash@users.noreply.github.com": "ash",
 }


@@ -17,6 +17,13 @@ Remove refusal behaviors (guardrails) from open-weight LLMs without retraining o

 **License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.

+## Video Guide
+
+Walkthrough of OBLITERATUS used by a Hermes agent to abliterate Gemma:
+https://www.youtube.com/watch?v=8fG9BrNTeHs ("OBLITERATUS: An AI Agent Removed Gemma 4's Safety Guardrails")
+
+Useful when the user wants a visual overview of the end-to-end workflow before running it themselves.
+
 ## When to Use This Skill

 Trigger when the user:
@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
                },
            },
            "auxiliary": {
-                "flush_memories": {
+                "compression": {
                    "provider": "myrelay",
                    "model": "claude-sonnet-4.6",
                },
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
            AnthropicAuxiliaryClient,
            AsyncAnthropicAuxiliaryClient,
        )
-        async_client, async_model = get_async_text_auxiliary_client("flush_memories")
+        async_client, async_model = get_async_text_auxiliary_client("compression")
        assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
        assert async_model == "claude-sonnet-4.6"

-        sync_client, sync_model = get_text_auxiliary_client("flush_memories")
+        sync_client, sync_model = get_text_auxiliary_client("compression")
        assert isinstance(sync_client, AnthropicAuxiliaryClient)
        assert sync_model == "claude-sonnet-4.6"

@@ -847,6 +847,32 @@ class TestTokenBudgetTailProtection:
        assert isinstance(pruned, int)


+class TestUpdateModelBudgets:
+    """Regression: update_model() must recalculate token budgets."""
+
+    def test_tail_budget_recalculated(self):
+        """tail_token_budget must change after switching to a different context length."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        old_tail = comp.tail_token_budget
+        old_max_summary = comp.max_summary_tokens
+
+        comp.update_model("model-b", context_length=32_000)
+        assert comp.tail_token_budget != old_tail, "tail_token_budget should change"
+        assert comp.tail_token_budget < old_tail, "smaller context → smaller budget"
+        assert comp.max_summary_tokens != old_max_summary, "max_summary_tokens should change"
+
+    def test_budgets_proportional(self):
+        """Budgets should be proportional to context_length after update."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        comp.update_model("model-b", context_length=10_000)
+        assert comp.tail_token_budget == int(comp.threshold_tokens * comp.summary_target_ratio)
+        assert comp.max_summary_tokens == min(int(10_000 * 0.05), 4000)
+
+
 class TestTruncateToolCallArgsJson:
    """Regression tests for #11762.

@@ -0,0 +1,201 @@
+"""Regression tests for the generic unsupported-parameter detector in
+``agent.auxiliary_client``.
+
+The original temperature-specific detector (PR #15621) was generalized so the
+same reactive-retry strategy covers any provider that rejects an arbitrary
+request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
+just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
+pattern.
+
+These tests lock in:
+  * ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
+  * the back-compat wrapper ``_is_unsupported_temperature_error`` still works
+  * the max_tokens retry branch no longer pops a key that was never set
+    (``max_tokens is None`` gate)
+  * the max_tokens retry branch matches via the generic helper on top of the
+    legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
+"""
+
+from unittest.mock import patch, MagicMock, AsyncMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    call_llm,
+    async_call_llm,
+    _is_unsupported_parameter_error,
+    _is_unsupported_temperature_error,
+)
+
+
+class TestIsUnsupportedParameterError:
+    """The generic detector must match real provider phrasings for any param."""
+
+    @pytest.mark.parametrize("param,message", [
+        # temperature phrasings (regression coverage via the generic API)
+        ("temperature", "HTTP 400: Unsupported parameter: temperature"),
+        ("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
+        ("temperature", "this model does not support temperature"),
+        # max_tokens phrasings
+        ("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
+        ("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
+        ("max_tokens", "Invalid parameter: max_tokens is not supported"),
+        # arbitrary future params
+        ("seed", "HTTP 400: unrecognized parameter: seed"),
+        ("top_p", "Error: top_p is not supported for this model"),
+    ])
+    def test_matches_real_provider_messages(self, param, message):
+        assert _is_unsupported_parameter_error(RuntimeError(message), param) is True
+
+    @pytest.mark.parametrize("param,message", [
+        # Param not mentioned at all
+        ("temperature", "HTTP 400: max_tokens is too large"),
+        # Param mentioned but not flagged as unsupported
+        ("temperature", "temperature must be between 0 and 2"),
+        # Totally unrelated 400
+        ("max_tokens", "Rate limit exceeded"),
+        # Connection-level errors
+        ("temperature", "Connection reset by peer"),
+    ])
+    def test_does_not_match_unrelated_errors(self, param, message):
+        assert _is_unsupported_parameter_error(RuntimeError(message), param) is False
+
+    def test_empty_param_returns_false(self):
+        assert _is_unsupported_parameter_error(
+            RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
+        ) is False
+
+    def test_temperature_wrapper_delegates_to_generic(self):
+        """Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
+        msg = "HTTP 400: Unsupported parameter: temperature"
+        assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
+        # And the unrelated-case still holds
+        assert _is_unsupported_temperature_error(
+            RuntimeError("max_tokens is too large")) is False
+
+
+def _dummy_response():
+    """Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
+    return {"ok": True}
+
+
+class TestMaxTokensRetryHardening:
+    """The max_tokens retry branch now (a) gates on ``max_tokens is not None``
+    and (b) also matches the generic phrasings via the helper.
+    """
+
+    def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
+        """No max_tokens kwarg → must not pop/retry even if the error mentions it.
+
+        Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
+        ``kwargs["max_completion_tokens"] = max_tokens`` would set a None
+        value and hit the provider again. The gate skips the whole branch.
+        """
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
+        client.chat.completions.create.side_effect = err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            with pytest.raises(RuntimeError):
+                call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "hi"}],
+                    temperature=0.3,
+                    # max_tokens omitted on purpose
+                )
+
+        # Only the initial attempt — no retry because the gate blocked it
+        assert client.chat.completions.create.call_count == 1
+
+    def test_sync_max_tokens_retry_matches_generic_phrasing(self):
+        """A 400 saying "Unknown parameter: max_tokens" (not the legacy
+        substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
+        now triggers the retry via the generic helper.
+        """
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("Unknown parameter: max_tokens")
+        response = _dummy_response()
+        client.chat.completions.create.side_effect = [err, response]
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+        assert result is response
+        assert client.chat.completions.create.call_count == 2
+        second_call = client.chat.completions.create.call_args_list[1]
+        assert "max_tokens" not in second_call.kwargs
+        assert second_call.kwargs["max_completion_tokens"] == 512
+
+    @pytest.mark.asyncio
+    async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
+        client.chat.completions.create = AsyncMock(side_effect=err)
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            with pytest.raises(RuntimeError):
+                await async_call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "hi"}],
+                    temperature=0.3,
+                )
+
+        assert client.chat.completions.create.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_async_max_tokens_retry_matches_generic_phrasing(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("Unknown parameter: max_tokens")
+        response = _dummy_response()
+        client.chat.completions.create = AsyncMock(side_effect=[err, response])
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+        assert result is response
+        assert client.chat.completions.create.await_count == 2
+        second_call = client.chat.completions.create.call_args_list[1]
+        assert "max_tokens" not in second_call.kwargs
+        assert second_call.kwargs["max_completion_tokens"] == 512
@@ -0,0 +1,237 @@
+"""Regression tests for the universal "unsupported temperature" retry in
+``agent.auxiliary_client``.
+
+Auxiliary callers (context compression, session search,
+web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
+reasons. Several provider/model combinations reject ``temperature`` with a
+400:
+
+  * OpenAI Responses (gpt-5/o-series reasoning models)
+  * Copilot Responses (reasoning models)
+  * OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
+  * Anthropic Opus 4.7+ via OpenAI-compat endpoints
+  * Kimi/Moonshot (server-managed)
+
+``_fixed_temperature_for_model`` catches Kimi up front, and
+``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
+but the same backend can accept ``temperature`` for some models and reject
+it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
+endpoint). An allow/deny-list is not maintainable across providers.
+
+The universal fix is reactive: when a call returns an
+``Unsupported parameter: temperature`` 400, retry once without temperature.
+These tests lock in that behaviour for both sync and async paths.
+"""
+
+from unittest.mock import patch, MagicMock, AsyncMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    call_llm,
+    async_call_llm,
+    _is_unsupported_temperature_error,
+)
+
+
+class TestIsUnsupportedTemperatureError:
+    """The detector must match the phrasings providers actually return."""
+
+    @pytest.mark.parametrize("message", [
+        # OpenAI / Codex Responses
+        "HTTP 400: Unsupported parameter: temperature",
+        "Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
+        # Copilot / OpenAI error-code form
+        "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
+        # OpenRouter-style
+        "Provider returned error: temperature is not supported for this model",
+        "this model does not support temperature",
+        # Anthropic-style via OAI-compat
+        "temperature: unknown parameter",
+        # Some gateways
+        "unrecognized request argument supplied: temperature",
+    ])
+    def test_matches_real_provider_messages(self, message):
+        assert _is_unsupported_temperature_error(RuntimeError(message)) is True
+
+    @pytest.mark.parametrize("message", [
+        # Unrelated 400s must NOT trigger a silent-retry
+        "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
+        "max_tokens is too large for this model",
+        "Rate limit exceeded",
+        "Connection reset by peer",
+        # Temperature value error is a different class of problem
+        "temperature must be between 0 and 2",
+    ])
+    def test_does_not_match_unrelated_errors(self, message):
+        assert _is_unsupported_temperature_error(RuntimeError(message)) is False
+
+
+def _dummy_response():
+    # The real code calls _validate_llm_response which inspects
+    # response.choices[0].message.  The tests here patch that out, so
+    # any sentinel object is fine.
+    return {"ok": True}
+
+
+class TestCallLlmUnsupportedTemperatureRetry:
+    """``call_llm`` retries once without temperature and returns on success."""
+
+    def _setup(self, first_exc):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
+        return client
+
+    @pytest.mark.parametrize("error_message", [
+        "HTTP 400: Unsupported parameter: temperature",
+        "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
+        "Provider error: this model does not support temperature",
+    ])
+    def test_retries_once_without_temperature(self, error_message):
+        client = self._setup(RuntimeError(error_message))
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "remember this"}],
+                temperature=0.3,
+                max_tokens=500,
+            )
+
+        assert result == {"ok": True}
+        assert client.chat.completions.create.call_count == 2
+        first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
+        retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
+        assert first_kwargs["temperature"] == 0.3
+        assert "temperature" not in retry_kwargs
+        # other kwargs preserved
+        assert retry_kwargs["max_tokens"] == 500
+
+    def test_non_temperature_400_does_not_retry_as_temperature(self):
+        """Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        non_temp_err = RuntimeError(
+            "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
+        )
+        client.chat.completions.create.side_effect = non_temp_err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError, match="Invalid value"):
+                call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=0.3,
+                    max_tokens=500,
+                )
+        # Should NOT have retried (non-temperature 400 doesn't match)
+        assert client.chat.completions.create.call_count == 1
+
+    def test_no_retry_when_temperature_not_in_kwargs(self):
+        """If caller didn't send temperature, don't invent a temperature-retry."""
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        # Provider complains about temperature even though we didn't send it.
+        # (Pathological but possible with misleading error text.)  The guard
+        # ``"temperature" in kwargs`` must prevent an unnecessary retry.
+        err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
+        client.chat.completions.create.side_effect = err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError):
+                call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=None,  # explicit: no temperature sent
+                    max_tokens=500,
+                )
+        assert client.chat.completions.create.call_count == 1
+
+
+class TestAsyncCallLlmUnsupportedTemperatureRetry:
+    """``async_call_llm`` mirror of the sync retry semantics."""
+
+    @pytest.mark.asyncio
+    async def test_async_retries_once_without_temperature(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create = AsyncMock(side_effect=[
+            RuntimeError("HTTP 400: Unsupported parameter: temperature"),
+            _dummy_response(),
+        ])
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "query"}],
+                temperature=0.3,
+                max_tokens=500,
+            )
+
+        assert result == {"ok": True}
+        assert client.chat.completions.create.await_count == 2
+        first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
+        retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
+        assert first_kwargs["temperature"] == 0.3
+        assert "temperature" not in retry_kwargs
+        assert retry_kwargs["max_tokens"] == 500
+
+    @pytest.mark.asyncio
+    async def test_async_non_temperature_400_does_not_retry(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create = AsyncMock(
+            side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
+        )
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError, match="Invalid value"):
+                await async_call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=0.3,
+                    max_tokens=500,
+                )
+        assert client.chat.completions.create.await_count == 1
@@ -33,7 +33,6 @@ class _FakeAgent:
        self._todo_store.write(
            [{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
        )
-        self.flush_memories = MagicMock()
        self.commit_memory_session = MagicMock()
        self._invalidate_system_prompt = MagicMock()

@@ -157,7 +156,6 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
    assert cli.agent._todo_store.read() == []
    assert cli.session_start > old_session_start
    assert cli.agent.session_start == cli.session_start
-    cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
    cli.agent._invalidate_system_prompt.assert_called_once()


@@ -0,0 +1,390 @@
+"""Tests for cron job context_from feature (issue #5439 Option C)."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+
+@pytest.fixture
+def cron_env(tmp_path, monkeypatch):
+    """Isolated cron environment with temp HERMES_HOME."""
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "cron").mkdir()
+    (hermes_home / "cron" / "output").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import cron.jobs as jobs_mod
+    monkeypatch.setattr(jobs_mod, "HERMES_DIR", hermes_home)
+    monkeypatch.setattr(jobs_mod, "CRON_DIR", hermes_home / "cron")
+    monkeypatch.setattr(jobs_mod, "JOBS_FILE", hermes_home / "cron" / "jobs.json")
+    monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", hermes_home / "cron" / "output")
+
+    return hermes_home
+
+
+class TestJobContextFromField:
+    """Test that context_from is stored and retrieved correctly."""
+
+    def test_create_job_with_context_from_string(self, cron_env):
+        from cron.jobs import create_job, get_job
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize findings",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+
+        assert job_b["context_from"] == [job_a["id"]]
+        loaded = get_job(job_b["id"])
+        assert loaded["context_from"] == [job_a["id"]]
+
+    def test_create_job_with_context_from_list(self, cron_env):
+        from cron.jobs import create_job, get_job
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Find weather", schedule="every 1h")
+        job_c = create_job(
+            prompt="Summarize everything",
+            schedule="every 2h",
+            context_from=[job_a["id"], job_b["id"]],
+        )
+
+        assert job_c["context_from"] == [job_a["id"], job_b["id"]]
+
+    def test_create_job_without_context_from(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h")
+        assert job.get("context_from") is None
+
+    def test_context_from_empty_string_normalized_to_none(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h", context_from="")
+        assert job.get("context_from") is None
+
+    def test_context_from_empty_list_normalized_to_none(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h", context_from=[])
+        assert job.get("context_from") is None
+
+
+class TestBuildJobPromptContextFrom:
+    """Test that _build_job_prompt() injects context from referenced jobs."""
+
+    def test_injects_latest_output(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+
+        # Записываем output для job_a
+        output_dir = OUTPUT_DIR / job_a["id"]
+        output_dir.mkdir(parents=True, exist_ok=True)
+        (output_dir / "2026-04-22_10-00-00.md").write_text(
+            "Today's top story: AI is everywhere.", encoding="utf-8"
+        )
+
+        job_b = create_job(
+            prompt="Summarize the news",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+
+        prompt = _build_job_prompt(job_b)
+        assert "Today's top story: AI is everywhere." in prompt
+        assert f"Output from job '{job_a['id']}'" in prompt
+
+    def test_uses_most_recent_output(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        import time
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        output_dir = OUTPUT_DIR / job_a["id"]
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        old_file = output_dir / "2026-04-22_08-00-00.md"
+        old_file.write_text("Old output", encoding="utf-8")
+        time.sleep(0.01)
+        new_file = output_dir / "2026-04-22_10-00-00.md"
+        new_file.write_text("New output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
+        )
+        prompt = _build_job_prompt(job_b)
+        assert "New output" in prompt
+        assert "Old output" not in prompt
+
+    def test_graceful_when_no_output_yet(self, cron_env):
+        from cron.jobs import create_job
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # job_a never ran — output dir does not exist
+        # expect silent skip: no placeholder injected, base prompt intact
+        prompt = _build_job_prompt(job_b)
+        assert "no output" not in prompt.lower()
+        assert "not found" not in prompt.lower()
+        assert "Summarize" in prompt
+
+    def test_injects_multiple_context_jobs(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Find weather", schedule="every 1h")
+
+        for job, content in [(job_a, "News: AI boom"), (job_b, "Weather: Sunny")]:
+            out_dir = OUTPUT_DIR / job["id"]
+            out_dir.mkdir(parents=True, exist_ok=True)
+            (out_dir / "2026-04-22_10-00-00.md").write_text(content, encoding="utf-8")
+
+        job_c = create_job(
+            prompt="Daily briefing",
+            schedule="every 2h",
+            context_from=[job_a["id"], job_b["id"]],
+        )
+        prompt = _build_job_prompt(job_c)
+        assert "News: AI boom" in prompt
+        assert "Weather: Sunny" in prompt
+
+    def test_context_injected_before_prompt(self, cron_env):
+        """Context should appear before the job's own prompt."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Context data", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process the data above",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+        prompt = _build_job_prompt(job_b)
+        context_pos = prompt.find("Context data")
+        prompt_pos = prompt.find("Process the data above")
+        assert context_pos < prompt_pos
+
+    def test_output_truncated_at_8k_chars(self, cron_env):
+        """Output longer than 8000 chars should be truncated."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        big_output = "x" * 10000
+        (out_dir / "2026-04-22_10-00-00.md").write_text(big_output, encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+        prompt = _build_job_prompt(job_b)
+        assert "truncated" in prompt
+        assert "x" * 10000 not in prompt
+
+    def test_graceful_when_file_deleted_between_listing_and_reading(self, cron_env):
+        """Job should not crash if output file is deleted mid-read."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        from unittest.mock import patch
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # Simulate file deleted between glob() and read_text()
+        original_read = Path.read_text
+        def mock_read_text(self, *args, **kwargs):
+            if self.suffix == ".md":
+                raise FileNotFoundError("file deleted mid-read")
+            return original_read(self, *args, **kwargs)
+
+        with patch.object(Path, "read_text", mock_read_text):
+            prompt = _build_job_prompt(job_b)
+
+        # Job should not crash, prompt should still contain the base prompt
+        assert "Process" in prompt
+
+    def test_graceful_when_permission_error(self, cron_env):
+        """Job should not crash if output directory is not readable."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        from unittest.mock import patch
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # Simulate permission error on read
+        original_read = Path.read_text
+        def mock_read_text(self, *args, **kwargs):
+            if self.suffix == ".md":
+                raise PermissionError("permission denied")
+            return original_read(self, *args, **kwargs)
+
+        with patch.object(Path, "read_text", mock_read_text):
+            prompt = _build_job_prompt(job_b)
+
+        # Job should not crash, prompt should still contain the base prompt
+        assert "Process" in prompt
+
+    def test_invalid_job_id_skipped(self, cron_env):
+        """context_from with path traversal job_id should be skipped."""
+        from cron.jobs import create_job
+        from cron.scheduler import _build_job_prompt
+
+        job = create_job(prompt="Process", schedule="every 2h")
+        # Manually inject invalid context_from (simulating tampered jobs.json)
+        job["context_from"] = ["../../../etc/passwd"]
+        prompt = _build_job_prompt(job)
+        # Should not crash and should not inject anything malicious
+        assert "Process" in prompt
+        assert "etc/passwd" not in prompt
+
+
+
+class TestUpdateContextFrom:
+    """Verify the cronjob tool's `update` action wires context_from through.
+
+    Without this, the create-path stores the field but users can never modify
+    or clear it via the tool (schema promises "pass an empty array to clear").
+    """
+
+    def test_update_adds_context_from_to_existing_job(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Summarize", schedule="every 2h")
+        assert job_b.get("context_from") is None
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=job_a["id"],
+        ))
+        assert result["success"] is True
+
+        reloaded = get_job(job_b["id"])
+        assert reloaded["context_from"] == [job_a["id"]]
+
+    def test_update_changes_context_from_reference(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_a2 = create_job(prompt="Find weather", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+        assert job_b["context_from"] == [job_a["id"]]
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=[job_a2["id"]],
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] == [job_a2["id"]]
+
+    def test_update_clears_context_from_with_empty_list(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+        assert get_job(job_b["id"])["context_from"] == [job_a["id"]]
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=[],
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] is None
+
+    def test_update_clears_context_from_with_empty_string(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from="",
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] is None
+
+    def test_update_rejects_unknown_job_reference(self, cron_env):
+        from cron.jobs import create_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_b = create_job(prompt="Summarize", schedule="every 2h")
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=["deadbeef0000"],
+        ))
+        assert result["success"] is False
+        assert "not found" in result["error"]
+
+    def test_update_preserves_context_from_when_not_passed(self, cron_env):
+        """Updating other fields must not clobber context_from."""
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+
+        # Update an unrelated field
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            prompt="Summarize v2",
+        ))
+        assert result["success"] is True
+        reloaded = get_job(job_b["id"])
+        assert reloaded["prompt"] == "Summarize v2"
+        assert reloaded["context_from"] == [job_a["id"]]
@@ -1,249 +0,0 @@
-"""Tests for proactive memory flush on session expiry.
-
-Verifies that:
-1. _is_session_expired() works from a SessionEntry alone (no source needed)
-2. The sync callback is no longer called in get_or_create_session
-3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
-4. The background watcher can detect expired sessions
-"""
-
-import pytest
-from datetime import datetime, timedelta
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-from gateway.config import Platform, GatewayConfig, SessionResetPolicy
-from gateway.session import SessionSource, SessionStore, SessionEntry
-
-
-@pytest.fixture()
-def idle_store(tmp_path):
-    """SessionStore with a 60-minute idle reset policy."""
-    config = GatewayConfig(
-        default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
-    )
-    with patch("gateway.session.SessionStore._ensure_loaded"):
-        s = SessionStore(sessions_dir=tmp_path, config=config)
-    s._db = None
-    s._loaded = True
-    return s
-
-
-@pytest.fixture()
-def no_reset_store(tmp_path):
-    """SessionStore with no reset policy (mode=none)."""
-    config = GatewayConfig(
-        default_reset_policy=SessionResetPolicy(mode="none"),
-    )
-    with patch("gateway.session.SessionStore._ensure_loaded"):
-        s = SessionStore(sessions_dir=tmp_path, config=config)
-    s._db = None
-    s._loaded = True
-    return s
-
-
-class TestIsSessionExpired:
-    """_is_session_expired should detect expiry from entry alone."""
-
-    def test_idle_session_expired(self, idle_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_1",
-            created_at=datetime.now() - timedelta(hours=3),
-            updated_at=datetime.now() - timedelta(minutes=120),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is True
-
-    def test_active_session_not_expired(self, idle_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_2",
-            created_at=datetime.now() - timedelta(hours=1),
-            updated_at=datetime.now() - timedelta(minutes=10),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is False
-
-    def test_none_mode_never_expires(self, no_reset_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_3",
-            created_at=datetime.now() - timedelta(days=30),
-            updated_at=datetime.now() - timedelta(days=30),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert no_reset_store._is_session_expired(entry) is False
-
-    def test_active_processes_prevent_expiry(self, idle_store):
-        """Sessions with active background processes should never expire."""
-        idle_store._has_active_processes_fn = lambda key: True
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_4",
-            created_at=datetime.now() - timedelta(hours=5),
-            updated_at=datetime.now() - timedelta(hours=5),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is False
-
-    def test_daily_mode_expired(self, tmp_path):
-        """Daily mode should expire sessions from before today's reset hour."""
-        config = GatewayConfig(
-            default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
-        )
-        with patch("gateway.session.SessionStore._ensure_loaded"):
-            store = SessionStore(sessions_dir=tmp_path, config=config)
-        store._db = None
-        store._loaded = True
-
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_5",
-            created_at=datetime.now() - timedelta(days=2),
-            updated_at=datetime.now() - timedelta(days=2),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert store._is_session_expired(entry) is True
-
-
-class TestGetOrCreateSessionNoCallback:
-    """get_or_create_session should NOT call a sync flush callback."""
-
-    def test_auto_reset_creates_new_session_after_flush(self, idle_store):
-        """When a flushed session auto-resets, a new session_id is created."""
-        source = SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="123",
-            chat_type="dm",
-        )
-        # Create initial session
-        entry1 = idle_store.get_or_create_session(source)
-        old_sid = entry1.session_id
-
-        # Simulate the watcher having flushed it
-        entry1.memory_flushed = True
-
-        # Simulate the session going idle
-        entry1.updated_at = datetime.now() - timedelta(minutes=120)
-        idle_store._save()
-
-        # Next call should auto-reset
-        entry2 = idle_store.get_or_create_session(source)
-        assert entry2.session_id != old_sid
-        assert entry2.was_auto_reset is True
-        # New session starts with memory_flushed=False
-        assert entry2.memory_flushed is False
-
-    def test_no_sync_callback_invoked(self, idle_store):
-        """No synchronous callback should block during auto-reset."""
-        source = SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="123",
-            chat_type="dm",
-        )
-        entry1 = idle_store.get_or_create_session(source)
-        entry1.updated_at = datetime.now() - timedelta(minutes=120)
-        idle_store._save()
-
-        # Verify no _on_auto_reset attribute
-        assert not hasattr(idle_store, '_on_auto_reset')
-
-        # This should NOT block (no sync LLM call)
-        entry2 = idle_store.get_or_create_session(source)
-        assert entry2.was_auto_reset is True
-
-
-class TestMemoryFlushedFlag:
-    """The memory_flushed flag on SessionEntry prevents double-flushing."""
-
-    def test_defaults_to_false(self):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm:123",
-            session_id="sid_new",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert entry.memory_flushed is False
-
-    def test_persists_through_save_load(self, idle_store):
-        """memory_flushed=True must survive a save/load cycle (simulates restart)."""
-        key = "agent:main:discord:thread:789"
-        entry = SessionEntry(
-            session_key=key,
-            session_id="sid_flushed",
-            created_at=datetime.now() - timedelta(hours=5),
-            updated_at=datetime.now() - timedelta(hours=5),
-            platform=Platform.DISCORD,
-            chat_type="thread",
-            memory_flushed=True,
-        )
-        idle_store._entries[key] = entry
-        idle_store._save()
-
-        # Simulate restart: clear in-memory state, reload from disk
-        idle_store._entries.clear()
-        idle_store._loaded = False
-        idle_store._ensure_loaded()
-
-        reloaded = idle_store._entries[key]
-        assert reloaded.memory_flushed is True
-
-    def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
-        """An entry without memory_flushed stays False after reload."""
-        key = "agent:main:telegram:dm:456"
-        entry = SessionEntry(
-            session_key=key,
-            session_id="sid_not_flushed",
-            created_at=datetime.now() - timedelta(hours=2),
-            updated_at=datetime.now() - timedelta(hours=2),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        idle_store._entries[key] = entry
-        idle_store._save()
-
-        idle_store._entries.clear()
-        idle_store._loaded = False
-        idle_store._ensure_loaded()
-
-        reloaded = idle_store._entries[key]
-        assert reloaded.memory_flushed is False
-
-    def test_roundtrip_to_dict_from_dict(self):
-        """to_dict/from_dict must preserve memory_flushed."""
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm:999",
-            session_id="sid_rt",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-            memory_flushed=True,
-        )
-        d = entry.to_dict()
-        assert d["memory_flushed"] is True
-
-        restored = SessionEntry.from_dict(d)
-        assert restored.memory_flushed is True
-
-    def test_legacy_entry_without_field_defaults_false(self):
-        """Old sessions.json entries missing memory_flushed should default to False."""
-        data = {
-            "session_key": "agent:main:telegram:dm:legacy",
-            "session_id": "sid_legacy",
-            "created_at": datetime.now().isoformat(),
-            "updated_at": datetime.now().isoformat(),
-            "platform": "telegram",
-            "chat_type": "dm",
-            # no memory_flushed key
-        }
-        entry = SessionEntry.from_dict(data)
-        assert entry.memory_flushed is False
@@ -1,240 +0,0 @@
-"""Tests for memory flush stale-overwrite prevention (#2670).
-
-Verifies that:
-1. Cron sessions are skipped (no flush for headless cron runs)
-2. Current memory state is injected into the flush prompt so the
-   flush agent can see what's already saved and avoid overwrites
-3. The flush still works normally when memory files don't exist
-"""
-
-import sys
-import types
-import pytest
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
-
-
-@pytest.fixture(autouse=True)
-def _mock_dotenv(monkeypatch):
-    """gateway.run imports dotenv at module level; stub it so tests run without the package."""
-    fake = types.ModuleType("dotenv")
-    fake.load_dotenv = lambda *a, **kw: None
-    monkeypatch.setitem(sys.modules, "dotenv", fake)
-
-
-def _make_runner():
-    from gateway.run import GatewayRunner
-
-    runner = object.__new__(GatewayRunner)
-    runner._honcho_managers = {}
-    runner._honcho_configs = {}
-    runner._running_agents = {}
-    runner._pending_messages = {}
-    runner._pending_approvals = {}
-    runner.adapters = {}
-    runner.hooks = MagicMock()
-    runner.session_store = MagicMock()
-    return runner
-
-
-_TRANSCRIPT_4_MSGS = [
-    {"role": "user", "content": "hello"},
-    {"role": "assistant", "content": "hi there"},
-    {"role": "user", "content": "remember my name is Alice"},
-    {"role": "assistant", "content": "Got it, Alice!"},
-]
-
-
-class TestCronSessionBypass:
-    """Cron sessions should never trigger a memory flush."""
-
-    def test_cron_session_skipped(self):
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_job123_20260323_120000")
-        # session_store.load_transcript should never be called
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_cron_session_with_prefix_skipped(self):
-        """Cron sessions with different prefixes are still skipped."""
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_daily_20260323")
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_non_cron_session_proceeds(self):
-        """Non-cron sessions should still attempt the flush."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = []
-        runner._flush_memories_for_session("session_abc123")
-        runner.session_store.load_transcript.assert_called_once_with("session_abc123")
-
-
-def _make_flush_context(monkeypatch, memory_dir=None):
-    """Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
-    tmp_agent = MagicMock()
-    fake_run_agent = types.ModuleType("run_agent")
-    fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
-    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-    runner = _make_runner()
-    runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-    return runner, tmp_agent, memory_dir
-
-
-class TestMemoryInjection:
-    """The flush prompt should include current memory state from disk."""
-
-    def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
-        """When memory files exist, their content appears in the flush prompt."""
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
-        (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_123")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-
-        assert "Agent knows Python" in flush_prompt
-        assert "User prefers dark mode" in flush_prompt
-        assert "Name: Alice" in flush_prompt
-        assert "Timezone: PST" in flush_prompt
-        assert "Do NOT overwrite or remove entries" in flush_prompt
-        assert "current live state of memory" in flush_prompt
-
-    def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
-        """When no memory files exist, flush still runs without the guard."""
-        empty_dir = tmp_path / "no_memories"
-        empty_dir.mkdir()
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
-        ):
-            runner._flush_memories_for_session("session_456")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "Do NOT overwrite or remove entries" not in flush_prompt
-        assert "Review the conversation above" in flush_prompt
-
-    def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
-        """Empty memory files should not trigger the guard section."""
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("")
-        (memory_dir / "USER.md").write_text("  \n  ")  # whitespace only
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_789")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "current live state of memory" not in flush_prompt
-
-
-class TestFlushAgentSilenced:
-    """The flush agent must not produce any terminal output."""
-
-    def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
-        """_print_fn on the flush agent must be a no-op so tool output never leaks."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        captured_agent = {}
-
-        def _fake_ai_agent(*args, **kwargs):
-            agent = MagicMock()
-            captured_agent["instance"] = agent
-            return agent
-
-        fake_run_agent = types.ModuleType("run_agent")
-        fake_run_agent.AIAgent = _fake_ai_agent
-        monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
-        ):
-            runner._flush_memories_for_session("session_silent")
-
-        agent = captured_agent["instance"]
-        assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
-        # Confirm it is callable and produces no output (no exception)
-        agent._print_fn("should be silenced")
-
-    def test_kawaii_spinner_respects_print_fn(self):
-        """KawaiiSpinner must route all output through print_fn when supplied."""
-        from agent.display import KawaiiSpinner
-
-        written = []
-        spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
-        spinner._write("hello")
-        assert written == [("hello",)], "spinner should route through print_fn"
-
-        # A no-op print_fn must produce no output to stdout
-        import io, sys
-        buf = io.StringIO()
-        old_stdout = sys.stdout
-        sys.stdout = buf
-        try:
-            silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
-            silent_spinner._write("should not appear")
-            silent_spinner.stop("done")
-        finally:
-            sys.stdout = old_stdout
-        assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
-
-    def test_flush_agent_closes_resources_after_run(self, monkeypatch):
-        """Memory flush should close temporary agent resources after the turn."""
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-        tmp_agent.shutdown_memory_provider = MagicMock()
-        tmp_agent.close = MagicMock()
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
-        ):
-            runner._flush_memories_for_session("session_cleanup")
-
-        tmp_agent.shutdown_memory_provider.assert_called_once()
-        tmp_agent.close.assert_called_once()
-
-
-class TestFlushPromptStructure:
-    """Verify the flush prompt retains its core instructions."""
-
-    def test_core_instructions_present(self, monkeypatch):
-        """The flush prompt should still contain the original guidance."""
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
-        ):
-            runner._flush_memories_for_session("session_struct")
-
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "automatically reset" in flush_prompt
-        assert "Save any important facts" in flush_prompt
-        assert "consider saving it as a skill" in flush_prompt
-        assert "Do NOT respond to the user" in flush_prompt
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
 across gateway messenger platforms.
 """

-from unittest.mock import MagicMock, AsyncMock
+from unittest.mock import MagicMock

 import pytest

@@ -53,9 +53,6 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
    mock_store.switch_session.return_value = mock_session_entry
    runner.session_store = mock_store

-    # Stub out memory flushing
-    runner._async_flush_memories = AsyncMock()
-
    return runner


@@ -233,28 +230,3 @@ class TestHandleResumeCommand:

        assert real_key not in runner._running_agents
        db.close()
-
-    @pytest.mark.asyncio
-    async def test_resume_flushes_memories(self, tmp_path):
-        """Resume should flush memories from the current session before switching."""
-        from hermes_state import SessionDB
-
-        db = SessionDB(db_path=tmp_path / "state.db")
-        db.create_session("old_session", "telegram")
-        db.set_session_title("old_session", "Old Work")
-        db.create_session("current_session_001", "telegram")
-
-        event = _make_event(text="/resume Old Work")
-        runner = _make_runner(
-            session_db=db,
-            current_session_id="current_session_001",
-            event=event,
-        )
-
-        await runner._handle_resume_command(event)
-
-        runner._async_flush_memories.assert_called_once_with(
-            "current_session_001",
-            "agent:main:telegram:dm:67890",
-        )
-        db.close()
@@ -177,8 +177,8 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
    its reset policy (idle timeout, scheduled reset), it must fire
    ``on_session_finalize`` so plugin providers get the same final-pass
    extraction opportunity they'd get from /new or CLI shutdown.  Before
-    the fix, the expiry path flushed memories and evicted the agent but
-    silently skipped the hook.
+    the fix, the expiry path evicted the agent but silently skipped the
+    hook.
    """
    from datetime import datetime, timedelta

@@ -200,7 +200,7 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
        platform=Platform.TELEGRAM,
        chat_type="dm",
    )
-    expired_entry.memory_flushed = False
+    expired_entry.expiry_finalized = False

    runner.session_store = MagicMock()
    runner.session_store._ensure_loaded = MagicMock()
@@ -211,24 +211,24 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
    runner.session_store._lock.__exit__ = MagicMock(return_value=None)
    runner.session_store._save = MagicMock()

-    runner._async_flush_memories = AsyncMock()
    runner._evict_cached_agent = MagicMock()
    runner._cleanup_agent_resources = MagicMock()
    runner._sweep_idle_cached_agents = MagicMock(return_value=0)

    # The watcher starts with `await asyncio.sleep(60)` and loops while
-    # `self._running`. Patch sleep so the 60s initial delay is instant, then
-    # flip `_running` false inside the flush call so the loop exits cleanly
-    # after one pass.
+    # `self._running`.  Patch sleep so the 60s initial delay is instant, and
+    # make the expiry hook invocation flip `_running` false so the loop
+    # exits cleanly after one pass.
    _orig_sleep = __import__("asyncio").sleep

    async def _fast_sleep(_):
        await _orig_sleep(0)

-    async def _flush_and_stop(session_id, key):
-        runner._running = False  # terminate the loop after this iteration
+    def _hook_and_stop(*a, **kw):
+        runner._running = False
+        return None

-    runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop)
+    mock_invoke_hook.side_effect = _hook_and_stop

    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
        await runner._session_expiry_watcher(interval=0)
@@ -1,7 +1,7 @@
 """Regression tests for approval-state cleanup on session boundaries."""

 from datetime import datetime
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock

 import pytest

@@ -72,7 +72,6 @@ def _make_resume_runner():
    runner = object.__new__(GatewayRunner)
    runner.adapters = {}
    runner._background_tasks = set()
-    runner._async_flush_memories = AsyncMock()
    runner._running_agents = {}
    runner._running_agents_ts = {}
    runner._busy_ack_ts = {}
@@ -256,6 +256,17 @@ class TestDetectProviderForModel:
        """Models belonging to the current provider should not trigger a switch."""
        assert detect_provider_for_model("gpt-5.3-codex", "openai-codex") is None

+    def test_short_alias_resolves_to_static_model(self):
+        """Short aliases (e.g. sonnet) should resolve without network lookups."""
+        with patch(
+            "hermes_cli.models.fetch_openrouter_models",
+            side_effect=AssertionError("network lookup should not run"),
+        ):
+            result = detect_provider_for_model("sonnet", "auto")
+        assert result is not None
+        assert result[0] == "anthropic"
+        assert result[1].startswith("claude-sonnet")
+
    def test_openrouter_slug_match(self):
        """Models in the OpenRouter catalog should be found."""
        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
@@ -601,3 +601,189 @@ class TestImagegenModelPicker:
            _configure_imagegen_model("fal", config)
        assert isinstance(config["image_gen"], dict)
        assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
+
+
+def test_save_platform_tools_normalizes_numeric_entries():
+    """YAML may parse bare numeric toolset names as int. They should be
+    normalized to str so they survive the save round-trip.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", 12306, "custom-mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "12306" in saved
+    assert 12306 not in saved
+
+
+def test_save_platform_tools_clears_no_mcp_sentinel():
+    """`hermes tools` has no UI for no_mcp, so saving from the picker clears
+    the sentinel unconditionally — otherwise a user who once set no_mcp by
+    hand could never re-enable MCP servers through the UI.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "no_mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "no_mcp" not in saved
+
+
+def test_save_platform_tools_preserves_mcp_server_names():
+    """Non-sentinel passthrough entries (MCP server names) must still survive
+    the save — we only clear `no_mcp`, not every non-configurable entry.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "custom-mcp", "another-mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "custom-mcp" in saved
+    assert "another-mcp" in saved
+
+
+def test_get_platform_tools_recovers_non_configurable_toolsets_from_composite():
+    """Non-configurable toolsets whose tools are in the composite but not in
+    CONFIGURABLE_TOOLSETS should still appear in the result.
+    """
+    from toolsets import TOOLSETS
+    from hermes_cli.tools_config import PLATFORMS
+    from unittest.mock import patch as mock_patch
+
+    fake_toolsets = dict(TOOLSETS)
+    fake_toolsets["_test_platform_tool"] = {
+        "description": "test",
+        "tools": ["_test_special_tool"],
+        "includes": [],
+    }
+    fake_toolsets["hermes-_test_platform"] = {
+        "description": "test composite",
+        "tools": ["web_search", "web_extract", "terminal", "process", "_test_special_tool"],
+        "includes": [],
+    }
+
+    test_platforms = {
+        "_test_platform": {"label": "Test", "default_toolset": "hermes-_test_platform"},
+    }
+
+    with mock_patch("hermes_cli.tools_config.PLATFORMS", {**PLATFORMS, **test_platforms}):
+        with mock_patch("toolsets.TOOLSETS", fake_toolsets):
+            enabled = _get_platform_tools({}, "_test_platform")
+
+    assert "_test_platform_tool" in enabled
+    assert "web" in enabled
+    assert "terminal" in enabled
+
+
+def test_get_platform_tools_second_pass_skips_fully_claimed_toolsets():
+    """Toolsets whose tools are fully covered by configurable keys should NOT
+    be added by the second pass (prevents 'search', 'hermes-acp' noise).
+    """
+    enabled = _get_platform_tools({}, "cli")
+
+    assert "search" not in enabled
+
+
+def test_get_platform_tools_discord_both_off_by_default():
+    """Both `discord` and `discord_admin` are opt-in via `hermes tools`,
+    even on the Discord platform itself.  Users shouldn't auto-inherit 19
+    extra tools just because DISCORD_BOT_TOKEN is set."""
+    enabled = _get_platform_tools({}, "discord")
+    assert "discord" not in enabled
+    assert "discord_admin" not in enabled
+
+
+def test_discord_toolsets_in_configurable_toolsets():
+    keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    assert "discord" in keys
+    assert "discord_admin" in keys
+
+
+def test_discord_toolsets_in_default_off():
+    assert "discord" in _DEFAULT_OFF_TOOLSETS
+    assert "discord_admin" in _DEFAULT_OFF_TOOLSETS
+
+
+def test_discord_toolsets_not_available_on_other_platforms():
+    """Platform-scoping: discord / discord_admin should not appear on CLI,
+    Telegram, etc. — not even as an opt-in."""
+    from hermes_cli.tools_config import _toolset_allowed_for_platform
+    for plat in ["cli", "telegram", "slack", "whatsapp", "signal"]:
+        assert not _toolset_allowed_for_platform("discord", plat), (
+            f"`discord` toolset leaked onto {plat}"
+        )
+        assert not _toolset_allowed_for_platform("discord_admin", plat), (
+            f"`discord_admin` toolset leaked onto {plat}"
+        )
+    assert _toolset_allowed_for_platform("discord", "discord")
+    assert _toolset_allowed_for_platform("discord_admin", "discord")
+
+
+def test_discord_toolsets_user_enabled_are_honored():
+    """When the user opts in via `hermes tools`, the toolset appears."""
+    config = {"platform_toolsets": {"discord": ["web", "terminal", "discord"]}}
+    enabled = _get_platform_tools(config, "discord")
+    assert "discord" in enabled
+    assert "discord_admin" not in enabled
+
+
+def test_save_platform_tools_strips_restricted_toolsets():
+    """Hand-edited or all-platforms checklist with `discord` selected for
+    Telegram must be stripped at save time."""
+    from hermes_cli.tools_config import _save_platform_tools
+    config = {}
+    _save_platform_tools(config, "telegram", {"web", "terminal", "discord", "discord_admin"})
+    saved = config["platform_toolsets"]["telegram"]
+    assert "discord" not in saved
+    assert "discord_admin" not in saved
+    assert "web" in saved
+    assert "terminal" in saved
+
+
+def test_get_platform_tools_feishu_includes_doc_and_drive():
+    enabled = _get_platform_tools({}, "feishu")
+    assert "feishu_doc" in enabled
+    assert "feishu_drive" in enabled
+
+
+def test_get_platform_tools_feishu_tools_not_on_other_platforms():
+    for plat in ["cli", "telegram", "discord"]:
+        enabled = _get_platform_tools({}, plat)
+        assert "feishu_doc" not in enabled, f"feishu_doc leaked onto {plat}"
+        assert "feishu_drive" not in enabled, f"feishu_drive leaked onto {plat}"
+
+
+def test_get_effective_configurable_toolsets_dedupes_bundled_plugins():
+    """Bundled plugins (plugins/spotify) share their toolset key with the
+    built-in CONFIGURABLE_TOOLSETS entry. The effective list must not list
+    them twice — otherwise `hermes tools` → "reconfigure existing" shows
+    the same toolset two rows in a row.
+    """
+    from hermes_cli.tools_config import _get_effective_configurable_toolsets
+
+    all_ts = _get_effective_configurable_toolsets()
+    keys = [ts_key for ts_key, _, _ in all_ts]
+    assert len(keys) == len(set(keys)), (
+        f"duplicate toolset keys in effective list: "
+        f"{[k for k in keys if keys.count(k) > 1]}"
+    )
+    # Spotify specifically — the bug that motivated the dedupe.
+    spotify_rows = [t for t in all_ts if t[0] == "spotify"]
+    assert len(spotify_rows) == 1, spotify_rows
+    # Built-in label wins over the plugin label.
+    assert spotify_rows[0][1] == "🎵 Spotify"
@@ -19,6 +19,18 @@ def _touch_ink(root: Path) -> None:
    ink.write_text("{}")


+def _touch_tui_entry(root: Path) -> None:
+    entry = root / "dist" / "entry.js"
+    entry.parent.mkdir(parents=True, exist_ok=True)
+    entry.write_text("console.log('tui')")
+
+
+def _touch_ink_bundle(root: Path) -> None:
+    bundle = root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js"
+    bundle.parent.mkdir(parents=True, exist_ok=True)
+    bundle.write_text("export {}")
+
+
 def test_need_install_when_ink_missing(tmp_path: Path, main_mod) -> None:
    (tmp_path / "package-lock.json").write_text("{}")
    assert main_mod._tui_need_npm_install(tmp_path) is True
@@ -51,3 +63,19 @@ def test_need_install_when_marker_missing(tmp_path: Path, main_mod) -> None:
 def test_no_install_without_lockfile_when_ink_present(tmp_path: Path, main_mod) -> None:
    _touch_ink(tmp_path)
    assert main_mod._tui_need_npm_install(tmp_path) is False
+
+
+def test_build_needed_when_local_ink_bundle_missing(tmp_path: Path, main_mod) -> None:
+    _touch_tui_entry(tmp_path)
+    _touch_ink(tmp_path)
+
+    assert main_mod._tui_need_npm_install(tmp_path) is False
+    assert main_mod._tui_build_needed(tmp_path) is True
+
+
+def test_build_not_needed_when_entry_and_ink_bundle_present(tmp_path: Path, main_mod) -> None:
+    _touch_tui_entry(tmp_path)
+    _touch_ink(tmp_path)
+    _touch_ink_bundle(tmp_path)
+
+    assert main_mod._tui_build_needed(tmp_path) is False
@@ -1,4 +1,5 @@
 from argparse import Namespace
+from pathlib import Path
 import sys
 import types

@@ -8,8 +9,11 @@ import pytest
 def _args(**overrides):
    base = {
        "continue_last": None,
+        "model": None,
+        "provider": None,
        "resume": None,
        "tui": True,
+        "tui_dev": False,
    }
    base.update(overrides)
    return Namespace(**base)
@@ -31,7 +35,7 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
        calls.append(source)
        return "20260408_235959_a1b2c3" if source == "tui" else None

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -58,7 +62,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
            return "20260408_235959_d4e5f6"
        return None

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -76,7 +80,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
 def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
    captured = {}

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -89,6 +93,60 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
    assert captured["resume"] == "20260409_000000_aa11bb"


+def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
+    captured = {}
+
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
+        captured.update(
+            {
+                "model": model,
+                "provider": provider,
+                "resume": resume_session_id,
+                "tui_dev": tui_dev,
+            }
+        )
+        raise SystemExit(0)
+
+    monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
+
+    with pytest.raises(SystemExit):
+        main_mod.cmd_chat(
+            _args(model="anthropic/claude-sonnet-4.6", provider="anthropic")
+        )
+
+    assert captured == {
+        "model": "anthropic/claude-sonnet-4.6",
+        "provider": "anthropic",
+        "resume": None,
+        "tui_dev": False,
+    }
+
+
+def test_launch_tui_exports_model_and_provider(monkeypatch, main_mod):
+    captured = {}
+
+    monkeypatch.setattr(
+        main_mod,
+        "_make_tui_argv",
+        lambda tui_dir, tui_dev: (["node", "dist/entry.js"], Path(".")),
+    )
+
+    def fake_call(argv, cwd=None, env=None):
+        captured.update({"argv": argv, "cwd": cwd, "env": env})
+        return 1
+
+    monkeypatch.setattr(main_mod.subprocess, "call", fake_call)
+
+    with pytest.raises(SystemExit):
+        main_mod._launch_tui(model="nous/hermes-test", provider="nous")
+
+    env = captured["env"]
+    assert env["HERMES_MODEL"] == "nous/hermes-test"
+    assert env["HERMES_INFERENCE_MODEL"] == "nous/hermes-test"
+    assert env["HERMES_TUI_PROVIDER"] == "nous"
+    assert env["HERMES_INFERENCE_PROVIDER"] == "nous"
+
+
 def test_print_tui_exit_summary_includes_resume_and_token_totals(monkeypatch, capsys):
    import hermes_cli.main as main_mod

@@ -1678,6 +1678,45 @@ class TestDashboardPluginManifestExtensions:
        entry = next(p for p in plugins if p["name"] == "mixed-slots")
        assert entry["slots"] == ["sidebar", "header-right"]

+    def test_page_scoped_slots_preserved(self, tmp_path, monkeypatch):
+        """Page-scoped slot names (e.g. ``sessions:top``) round-trip through
+        the manifest loader untouched.  The backend has no allowlist — the
+        frontend ``<PluginSlot name="...">`` placements decide what actually
+        renders — but the loader must not mangle colons in slot names."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self._write_plugin(tmp_path, "page-slots", {
+            "name": "page-slots",
+            "label": "Page Slots",
+            "tab": {"path": "/page-slots", "hidden": True},
+            "slots": [
+                "sessions:top",
+                "analytics:bottom",
+                "logs:top",
+                "skills:bottom",
+                "config:top",
+                "env:bottom",
+                "docs:top",
+                "cron:bottom",
+                "chat:top",
+            ],
+            "entry": "dist/index.js",
+        })
+        from hermes_cli import web_server
+        web_server._dashboard_plugins_cache = None
+        plugins = web_server._get_dashboard_plugins(force_rescan=True)
+        entry = next(p for p in plugins if p["name"] == "page-slots")
+        assert entry["slots"] == [
+            "sessions:top",
+            "analytics:bottom",
+            "logs:top",
+            "skills:bottom",
+            "config:top",
+            "env:bottom",
+            "docs:top",
+            "cron:bottom",
+            "chat:top",
+        ]
+

 # ---------------------------------------------------------------------------
 # /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab.
@@ -1925,34 +1964,3 @@ class TestPtyWebSocket:
            ):
                pass
        assert exc.value.code == 4400
-
-
-class TestEnvVarUpdateValidation:
-    """PUT /api/env must reject empty values to prevent .env key destruction."""
-
-    def test_rejects_empty_value(self):
-        from hermes_cli.web_server import EnvVarUpdate
-        import pydantic
-
-        with pytest.raises(pydantic.ValidationError):
-            EnvVarUpdate(key="SOME_KEY", value="")
-
-    def test_rejects_whitespace_only_value(self):
-        from hermes_cli.web_server import EnvVarUpdate
-        import pydantic
-
-        with pytest.raises(pydantic.ValidationError):
-            EnvVarUpdate(key="SOME_KEY", value="   ")
-
-    def test_accepts_nonempty_value(self):
-        from hermes_cli.web_server import EnvVarUpdate
-
-        update = EnvVarUpdate(key="SOME_KEY", value="sk-abc123")
-        assert update.value == "sk-abc123"
-
-    def test_rejects_empty_key(self):
-        from hermes_cli.web_server import EnvVarUpdate
-        import pydantic
-
-        with pytest.raises(pydantic.ValidationError):
-            EnvVarUpdate(key="", value="some-value")
@@ -31,7 +31,6 @@ def _make_agent_with_engine(engine):
    agent._vprint = lambda *a, **kw: None
    agent._last_flushed_db_idx = 0
    # Stub the few AIAgent methods _compress_context uses.
-    agent.flush_memories = lambda *a, **kw: None
    agent._invalidate_system_prompt = lambda *a, **kw: None
    agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
    agent.commit_memory_session = lambda *a, **kw: None
@@ -41,6 +41,7 @@ def _make_agent(
    agent.tool_progress_callback = None
    agent._compression_warning = None
    agent._aux_compression_context_length_config = None
+    agent.tools = []

    compressor = MagicMock(spec=ContextCompressor)
    compressor.context_length = main_context
@@ -82,7 +83,7 @@ def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_clien
    assert "threshold:" in messages[0]
    # Warning stored for gateway replay
    assert agent._compression_warning is not None
-    # Threshold on the live compressor was actually lowered
+    # Threshold on the live compressor was actually lowered to aux_context.
    assert agent.context_compressor.threshold_tokens == 80_000


@@ -180,6 +181,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct
        base_url="http://custom-endpoint:8080/v1",
        api_key="sk-custom",
        config_context_length=1_000_000,
+        provider="openrouter",
    )


@@ -202,6 +204,7 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_
        base_url="http://custom:8080/v1",
        api_key="sk-test",
        config_context_length=None,
+        provider="openrouter",
    )


@@ -254,6 +257,7 @@ def test_init_feasibility_check_uses_aux_context_override_from_config():
        base_url="http://custom-endpoint:8080/v1",
        api_key="sk-custom",
        config_context_length=1_000_000,
+        provider="",
    )


@@ -88,13 +88,13 @@ class TestCopyReasoningContentForApi:
        agent._copy_reasoning_content_for_api(source, api_msg)
        assert api_msg.get("reasoning_content") == ""

-    def test_deepseek_assistant_no_tool_call_left_alone(self) -> None:
-        """Plain assistant turns without tool_calls don't get padded."""
+    def test_deepseek_assistant_no_tool_call_gets_padded(self) -> None:
+        """DeepSeek thinking mode pads ALL assistant turns, even without tool_calls."""
        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
        source = {"role": "assistant", "content": "hello"}
        api_msg: dict = {}
        agent._copy_reasoning_content_for_api(source, api_msg)
-        assert "reasoning_content" not in api_msg
+        assert api_msg.get("reasoning_content") == ""

    def test_deepseek_explicit_reasoning_content_preserved(self) -> None:
        """When reasoning_content is already set, it's copied verbatim."""
@@ -1,329 +0,0 @@
-"""Tests for flush_memories() working correctly across all provider modes.
-
-Catches the bug where Codex mode called chat.completions.create on a
-Responses-only client, which would fail silently or with a 404.
-"""
-
-import json
-import os
-import sys
-import types
-from types import SimpleNamespace
-from unittest.mock import patch, MagicMock, call
-
-import pytest
-
-sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
-sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
-sys.modules.setdefault("fal_client", types.SimpleNamespace())
-
-import run_agent
-
-
-class _FakeOpenAI:
-    def __init__(self, **kwargs):
-        self.kwargs = kwargs
-        self.api_key = kwargs.get("api_key", "test")
-        self.base_url = kwargs.get("base_url", "http://test")
-
-    def close(self):
-        pass
-
-
-def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
-    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
-    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
-        {
-            "type": "function",
-            "function": {
-                "name": "memory",
-                "description": "Manage memories.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "action": {"type": "string"},
-                        "target": {"type": "string"},
-                        "content": {"type": "string"},
-                    },
-                },
-            },
-        },
-    ])
-    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
-    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
-
-    agent = run_agent.AIAgent(
-        api_key="test-key",
-        base_url="https://test.example.com/v1",
-        provider=provider,
-        api_mode=api_mode,
-        max_iterations=4,
-        quiet_mode=True,
-        skip_context_files=True,
-        skip_memory=True,
-    )
-    # Give it a valid memory store
-    agent._memory_store = MagicMock()
-    agent._memory_flush_min_turns = 1
-    agent._user_turn_count = 5
-    return agent
-
-
-def _chat_response_with_memory_call():
-    """Simulated chat completions response with a memory tool call."""
-    return SimpleNamespace(
-        choices=[SimpleNamespace(
-            finish_reason="tool_calls",
-            message=SimpleNamespace(
-                content=None,
-                tool_calls=[SimpleNamespace(
-                    id="call_mem_0",
-                    type="function",
-                    function=SimpleNamespace(
-                        name="memory",
-                        arguments=json.dumps({
-                            "action": "add",
-                            "target": "notes",
-                            "content": "User prefers dark mode.",
-                        }),
-                    ),
-                )],
-            ),
-        )],
-        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
-    )
-
-
-class TestFlushMemoriesRespectsConfigTimeout:
-    """flush_memories() must NOT hardcode timeout=30.0 — it should defer
-    to the config value via auxiliary.flush_memories.timeout."""
-
-    def test_auxiliary_path_omits_explicit_timeout(self, monkeypatch):
-        """When calling _call_llm, timeout should NOT be passed so that
-        _get_task_timeout('flush_memories') reads from config."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        mock_call.assert_called_once()
-        call_kwargs = mock_call.call_args
-        # timeout must NOT be explicitly passed (so _get_task_timeout resolves it)
-        assert "timeout" not in call_kwargs.kwargs, (
-            "flush_memories should not pass explicit timeout to _call_llm; "
-            "let _get_task_timeout('flush_memories') resolve from config"
-        )
-
-    def test_fallback_path_uses_config_timeout(self, monkeypatch):
-        """When auxiliary client is unavailable and we fall back to direct
-        OpenAI client, timeout should come from _get_task_timeout, not hardcoded."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-
-        custom_timeout = 180.0
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
-             patch("agent.auxiliary_client._get_task_timeout", return_value=custom_timeout) as mock_gtt, \
-             patch("tools.memory_tool.memory_tool", return_value="Saved."):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        mock_gtt.assert_called_once_with("flush_memories")
-        agent.client.chat.completions.create.assert_called_once()
-        call_kwargs = agent.client.chat.completions.create.call_args
-        assert call_kwargs.kwargs.get("timeout") == custom_timeout, (
-            f"Expected timeout={custom_timeout} from config, got {call_kwargs.kwargs.get('timeout')}"
-        )
-
-
-class TestFlushMemoriesUsesAuxiliaryClient:
-    """When an auxiliary client is available, flush_memories should use it
-    instead of self.client -- especially critical in Codex mode."""
-
-    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
-        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Remember this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-                agent.flush_memories(messages)
-
-        mock_call.assert_called_once()
-        call_kwargs = mock_call.call_args
-        assert call_kwargs.kwargs.get("task") == "flush_memories"
-
-    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
-        """Non-Codex mode with no auxiliary falls back to self.client."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Save this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        agent.client.chat.completions.create.assert_called_once()
-
-    def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
-        """Provider/API failures from auxiliary flush must be visible.
-
-        Exhausted keys and rate limits are not always RuntimeError. They used
-        to fall into the broad outer handler and disappear into debug logs.
-        """
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-        events = []
-        agent.status_callback = lambda kind, text=None: events.append((kind, text))
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
-             patch("tools.memory_tool.memory_tool", return_value="Saved."):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        agent.client.chat.completions.create.assert_called_once()
-        assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
-
-    def test_flush_executes_memory_tool_calls(self, monkeypatch):
-        """Verify that memory tool calls from the flush response actually get executed."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-                agent.flush_memories(messages)
-
-        mock_memory.assert_called_once()
-        call_kwargs = mock_memory.call_args
-        assert call_kwargs.kwargs["action"] == "add"
-        assert call_kwargs.kwargs["target"] == "notes"
-        assert "dark mode" in call_kwargs.kwargs["content"]
-
-    def test_flush_bridges_memory_write_metadata(self, monkeypatch):
-        """Flush memory writes notify external providers with flush provenance."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent._memory_manager = MagicMock()
-        agent.session_id = "sess-flush"
-        agent.platform = "cli"
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        agent._memory_manager.on_memory_write.assert_called_once()
-        call_kwargs = agent._memory_manager.on_memory_write.call_args
-        assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.")
-        assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush"
-        assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories"
-        assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush"
-
-    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
-        """After flush, the flush prompt and any response should be removed from messages."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Remember X"},
-            ]
-            original_len = len(messages)
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        # Messages should not grow from the flush
-        assert len(messages) <= original_len
-        # No flush sentinel should remain
-        for msg in messages:
-            assert "_flush_sentinel" not in msg
-
-
-class TestFlushMemoriesCodexFallback:
-    """When no auxiliary client exists and we're in Codex mode, flush should
-    use the Codex Responses API path instead of chat.completions."""
-
-    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
-        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
-
-        codex_response = SimpleNamespace(
-            output=[
-                SimpleNamespace(
-                    type="function_call",
-                    call_id="call_1",
-                    name="memory",
-                    arguments=json.dumps({
-                        "action": "add",
-                        "target": "notes",
-                        "content": "Codex flush test",
-                    }),
-                ),
-            ],
-            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
-            status="completed",
-            model="gpt-5-codex",
-        )
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
-             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
-             patch.object(agent, "_build_api_kwargs") as mock_build, \
-             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-            mock_build.return_value = {
-                "model": "gpt-5-codex",
-                "instructions": "test",
-                "input": [],
-                "tools": [],
-                "max_output_tokens": 4096,
-            }
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        mock_stream.assert_called_once()
-        mock_memory.assert_called_once()
-        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
@@ -12,7 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock

 import pytest
-from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
+from agent.codex_responses_adapter import _chat_content_to_responses_parts, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items

 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -520,6 +520,111 @@ class TestChatMessagesToResponsesInput:
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0

+    def test_user_multimodal_content_uses_input_text(self, monkeypatch):
+        """User messages with list content must use input_text type."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": [
+            {"type": "text", "text": "find files"},
+        ]}]
+        items = _chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+        content = items[0]["content"]
+        assert isinstance(content, list)
+        assert content[0]["type"] == "input_text"
+        assert content[0]["text"] == "find files"
+
+    def test_assistant_multimodal_content_uses_output_text(self, monkeypatch):
+        """Assistant messages with list content must use output_text type.
+
+        This is the fix for #15687 — the Responses API rejects input_text
+        inside assistant messages.
+        """
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "assistant", "content": [
+            {"type": "text", "text": "I found the files."},
+        ]}]
+        items = _chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "assistant"
+        content = items[0]["content"]
+        assert isinstance(content, list)
+        assert content[0]["type"] == "output_text"
+        assert content[0]["text"] == "I found the files."
+
+    def test_preflight_preserves_assistant_output_text(self, monkeypatch):
+        """_preflight_codex_input_items must preserve output_text for assistant."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        raw_input = [
+            {"role": "user", "content": [{"type": "input_text", "text": "hi"}]},
+            {"role": "assistant", "content": [{"type": "output_text", "text": "hello"}]},
+        ]
+        normalized = _preflight_codex_input_items(raw_input)
+        user_content = normalized[0]["content"]
+        asst_content = normalized[1]["content"]
+        assert user_content[0]["type"] == "input_text"
+        assert asst_content[0]["type"] == "output_text"
+
+    def test_full_round_trip_with_list_content(self, monkeypatch):
+        """End-to-end: user + assistant with list content through both stages."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "content": [{"type": "text", "text": "hi there"}]},
+            {"role": "user", "content": [{"type": "text", "text": "continue"}]},
+        ]
+        items = _chat_messages_to_responses_input(messages)
+        normalized = _preflight_codex_input_items(items)
+
+        # User items use input_text
+        assert normalized[0]["content"][0]["type"] == "input_text"
+        assert normalized[2]["content"][0]["type"] == "input_text"
+        # Assistant item uses output_text
+        assert normalized[1]["content"][0]["type"] == "output_text"
+
+
+class TestChatContentToResponsesParts:
+    """Unit tests for _chat_content_to_responses_parts role parameter (#15687)."""
+
+    def test_default_role_emits_input_text(self):
+        """Default (user) role emits input_text."""
+        result = _chat_content_to_responses_parts([{"type": "text", "text": "hello"}])
+        assert result[0]["type"] == "input_text"
+
+    def test_explicit_user_role_emits_input_text(self):
+        result = _chat_content_to_responses_parts(
+            [{"type": "text", "text": "hello"}], role="user"
+        )
+        assert result[0]["type"] == "input_text"
+
+    def test_assistant_role_emits_output_text(self):
+        result = _chat_content_to_responses_parts(
+            [{"type": "text", "text": "hello"}], role="assistant"
+        )
+        assert result[0]["type"] == "output_text"
+
+    def test_assistant_role_with_string_parts(self):
+        """String parts in assistant content also get output_text."""
+        result = _chat_content_to_responses_parts(["hello"], role="assistant")
+        assert result[0]["type"] == "output_text"
+        assert result[0]["text"] == "hello"
+
+    def test_assistant_role_with_mixed_input_output_text_types(self):
+        """Parts already marked input_text or output_text get normalized to role's type."""
+        parts = [
+            {"type": "input_text", "text": "a"},
+            {"type": "output_text", "text": "b"},
+            {"type": "text", "text": "c"},
+        ]
+        result = _chat_content_to_responses_parts(parts, role="assistant")
+        # All text parts should become output_text regardless of original type
+        assert all(p["type"] == "output_text" for p in result)
+        assert [p["text"] for p in result] == ["a", "b", "c"]
+

 # ── Response normalization tests ─────────────────────────────────────────────

@@ -3078,48 +3078,6 @@ class TestRetryExhaustion:
        assert "bad messages" in result["error"]


-# ---------------------------------------------------------------------------
-# Flush sentinel leak
-# ---------------------------------------------------------------------------
-
-
-class TestFlushSentinelNotLeaked:
-    """_flush_sentinel must be stripped before sending messages to the API."""
-
-    def test_flush_sentinel_stripped_from_api_messages(self, agent_with_memory_tool):
-        """Verify _flush_sentinel is not sent to the API provider."""
-        agent = agent_with_memory_tool
-        agent._memory_store = MagicMock()
-        agent._memory_flush_min_turns = 1
-        agent._user_turn_count = 10
-        agent._cached_system_prompt = "system"
-
-        messages = [
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "hi"},
-            {"role": "user", "content": "remember this"},
-        ]
-
-        # Mock the API to return a simple response (no tool calls)
-        mock_msg = SimpleNamespace(content="OK", tool_calls=None)
-        mock_choice = SimpleNamespace(message=mock_msg)
-        mock_response = SimpleNamespace(choices=[mock_choice])
-        agent.client.chat.completions.create.return_value = mock_response
-
-        # Bypass auxiliary client so flush uses agent.client directly
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
-            agent.flush_memories(messages, min_turns=0)
-
-        # Check what was actually sent to the API
-        call_args = agent.client.chat.completions.create.call_args
-        assert call_args is not None, "flush_memories never called the API"
-        api_messages = call_args.kwargs.get("messages") or call_args[1].get("messages")
-        for msg in api_messages:
-            assert "_flush_sentinel" not in msg, (
-                f"_flush_sentinel leaked to API in message: {msg}"
-            )
-
-
 # ---------------------------------------------------------------------------
 # Conversation history mutation
 # ---------------------------------------------------------------------------
@@ -0,0 +1,162 @@
+"""Tests that /stop interrupts streaming retry loops immediately.
+
+When the agent is interrupted during a streaming API call, the outer poll
+loop closes the HTTP connection.  The inner `_call()` thread sees a
+connection error and enters its retry loop.  Before this fix, the retry
+loop would open a FRESH connection without checking `_interrupt_requested`,
+making /stop take multiple retry cycles × read-timeout to actually stop
+(510+ seconds observed on slow ollama-cloud providers).
+
+The fix adds an `_interrupt_requested` check at the top of the retry loop
+so the agent exits immediately instead of retrying.
+"""
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+def _make_agent(**kwargs):
+    """Create a minimal AIAgent for streaming tests."""
+    from run_agent import AIAgent
+
+    defaults = dict(
+        api_key="test-key",
+        base_url="https://example.com/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    defaults.update(kwargs)
+    agent = AIAgent(**defaults)
+    agent.api_mode = "chat_completions"
+    return agent
+
+
+class TestStreamInterruptBeforeRetry:
+    """Verify _interrupt_requested is checked before each streaming retry."""
+
+    @pytest.mark.filterwarnings(
+        "ignore::pytest.PytestUnhandledThreadExceptionWarning"
+    )
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_interrupt_prevents_stream_retry(self, mock_close, mock_create):
+        """When _interrupt_requested is set during a transient stream error,
+        the retry loop must NOT retry — it should raise InterruptedError
+        immediately instead of opening a fresh connection."""
+        import httpx
+
+        attempt_count = [0]
+
+        def fail_once_then_interrupt(*args, **kwargs):
+            attempt_count[0] += 1
+            if attempt_count[0] == 1:
+                # First attempt: simulate normal failure, then set interrupt
+                # (as if /stop arrived while the retry loop processes the error)
+                agent._interrupt_requested = True
+                raise httpx.ConnectError("connection reset by /stop")
+            # Should never reach here — the interrupt check should fire first
+            raise httpx.ConnectError("unexpected retry — interrupt not checked!")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = fail_once_then_interrupt
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = False
+
+        with pytest.raises(InterruptedError, match="interrupted"):
+            agent._interruptible_streaming_api_call({})
+
+        # Only 1 attempt should have been made — the interrupt should prevent retry
+        assert attempt_count[0] == 1, (
+            f"Expected 1 attempt but got {attempt_count[0]}. "
+            "The retry loop retried despite _interrupt_requested being set."
+        )
+
+    @pytest.mark.filterwarnings(
+        "ignore::pytest.PytestUnhandledThreadExceptionWarning"
+    )
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_interrupt_before_first_attempt(self, mock_close, mock_create):
+        """If _interrupt_requested is already set when the streaming call
+        starts, it should exit immediately without making any API call."""
+        mock_client = MagicMock()
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = True  # Pre-set before call
+
+        with pytest.raises(InterruptedError, match="interrupted"):
+            agent._interruptible_streaming_api_call({})
+
+        # No API call should have been made at all
+        assert mock_client.chat.completions.create.call_count == 0
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_normal_retry_still_works_without_interrupt(self, mock_close, mock_create):
+        """Without an interrupt, transient errors should still retry normally."""
+        import httpx
+
+        attempts = [0]
+
+        def fail_twice_then_succeed(*args, **kwargs):
+            attempts[0] += 1
+            if attempts[0] <= 2:
+                raise httpx.ConnectError("transient failure")
+            # Third attempt succeeds
+            chunks = [
+                SimpleNamespace(
+                    choices=[
+                        SimpleNamespace(
+                            index=0,
+                            delta=SimpleNamespace(
+                                content="ok",
+                                tool_calls=None,
+                                reasoning_content=None,
+                                reasoning=None,
+                            ),
+                            finish_reason=None,
+                        )
+                    ],
+                    model="test/model",
+                    usage=None,
+                ),
+                SimpleNamespace(
+                    choices=[
+                        SimpleNamespace(
+                            index=0,
+                            delta=SimpleNamespace(
+                                content=None,
+                                tool_calls=None,
+                                reasoning_content=None,
+                                reasoning=None,
+                            ),
+                            finish_reason="stop",
+                        )
+                    ],
+                    model="test/model",
+                    usage=None,
+                ),
+            ]
+            stream = MagicMock()
+            stream.__iter__ = MagicMock(return_value=iter(chunks))
+            stream.response = MagicMock()
+            stream.response.headers = {}
+            return stream
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = fail_twice_then_succeed
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = False
+
+        # Should succeed on the third attempt
+        result = agent._interruptible_streaming_api_call({})
+        assert result is not None
+        assert attempts[0] == 3
@@ -200,8 +200,8 @@ class TestToolsetConsistency:
    def test_hermes_platforms_share_core_tools(self):
        """All hermes-* platform toolsets share the same core tools.

-        Platform-specific additions (e.g. ``discord_server`` on
-        hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
+        Platform-specific additions (e.g. ``discord`` / ``discord_admin``
+        on hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
        the invariant is that the core set is identical across platforms.
        """
        platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
@@ -83,6 +83,100 @@ def test_status_callback_accepts_single_message_argument():
    )


+def test_resolve_model_uses_inference_model_env(monkeypatch):
+    monkeypatch.delenv("HERMES_MODEL", raising=False)
+    monkeypatch.setenv("HERMES_INFERENCE_MODEL", " anthropic/claude-sonnet-4.6\n")
+
+    assert server._resolve_model() == "anthropic/claude-sonnet-4.6"
+
+
+def test_resolve_model_strips_config_model(monkeypatch):
+    monkeypatch.delenv("HERMES_MODEL", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_MODEL", raising=False)
+    monkeypatch.setattr(
+        server, "_load_cfg", lambda: {"model": {"default": " nous/hermes-test "}}
+    )
+
+    assert server._resolve_model() == "nous/hermes-test"
+
+
+def test_startup_runtime_uses_tui_provider_env(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
+    monkeypatch.setenv("HERMES_TUI_PROVIDER", "nous")
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+
+    assert server._resolve_startup_runtime() == ("nous/hermes-test", "nous")
+
+
+def test_startup_runtime_does_not_treat_inference_provider_as_explicit(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_static_provider_for_model",
+        lambda model, provider: None,
+    )
+
+    assert server._resolve_startup_runtime() == ("nous/hermes-test", None)
+
+
+def test_startup_runtime_detects_provider_for_model_env(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+
+    def fake_detect(model, current_provider):
+        assert model == "sonnet"
+        assert current_provider == "auto"
+        return "anthropic", "anthropic/claude-sonnet-4.6"
+
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_static_provider_for_model", fake_detect
+    )
+
+    assert server._resolve_startup_runtime() == (
+        "anthropic/claude-sonnet-4.6",
+        "anthropic",
+    )
+
+
+def test_startup_runtime_resolves_short_alias_without_network(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+    monkeypatch.setattr(
+        "hermes_cli.models.fetch_openrouter_models",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("network lookup should not run")
+        ),
+    )
+
+    model, provider = server._resolve_startup_runtime()
+
+    assert provider == "anthropic"
+    assert model.startswith("claude-sonnet")
+
+
+def test_startup_runtime_does_not_call_network_detector(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_provider_for_model",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("network detector called")
+        ),
+    )
+
+    model, provider = server._resolve_startup_runtime()
+
+    assert model
+    assert provider in {None, "anthropic"}
+
+
 def _session(agent=None, **extra):
    return {
        "agent": agent if agent is not None else types.SimpleNamespace(),
@@ -245,6 +339,14 @@ def test_setup_status_reports_provider_config(monkeypatch):
    assert resp["result"]["provider_configured"] is False


+def test_complete_slash_includes_provider_alias():
+    resp = server.handle_request(
+        {"id": "1", "method": "complete.slash", "params": {"text": "/pro"}}
+    )
+
+    assert any(item["text"] == "provider" for item in resp["result"]["items"])
+
+
 def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypatch):
    monkeypatch.setattr(server, "_hermes_home", tmp_path)
    agent = types.SimpleNamespace(reasoning_config=None)
@@ -415,6 +517,57 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch):
    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"


+def test_config_set_model_syncs_tui_provider_env(monkeypatch):
+    class Agent:
+        model = "gpt-5.3-codex"
+        provider = "openai-codex"
+        base_url = ""
+        api_key = ""
+
+        def switch_model(self, **kwargs):
+            self.model = kwargs["new_model"]
+            self.provider = kwargs["new_provider"]
+
+    agent = Agent()
+    server._sessions["sid"] = _session(agent=agent)
+    monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex")
+    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+
+    def fake_switch_model(**kwargs):
+        return types.SimpleNamespace(
+            success=True,
+            new_model="anthropic/claude-sonnet-4.6",
+            target_provider="anthropic",
+            api_key="key",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+            warning_message="",
+        )
+
+    monkeypatch.setattr("hermes_cli.model_switch.switch_model", fake_switch_model)
+
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {
+                    "session_id": "sid",
+                    "key": "model",
+                    "value": "anthropic/claude-sonnet-4.6 --provider anthropic",
+                },
+            }
+        )
+
+        assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6"
+        assert os.environ["HERMES_TUI_PROVIDER"] == "anthropic"
+        assert os.environ["HERMES_MODEL"] == "anthropic/claude-sonnet-4.6"
+        assert os.environ["HERMES_INFERENCE_MODEL"] == "anthropic/claude-sonnet-4.6"
+    finally:
+        server._sessions.clear()
+
+
 def test_config_set_personality_rejects_unknown_name(monkeypatch):
    monkeypatch.setattr(
        server,
@@ -2128,5 +2128,103 @@ class TestOrchestratorEndToEnd(unittest.TestCase):
        self.assertFalse(built_agents[2]["is_orchestrator_prompt"])


+class TestSubagentApprovalCallback(unittest.TestCase):
+    """Subagent worker threads must have a non-interactive approval callback
+    installed so dangerous-command prompts don't fall back to input() and
+    deadlock the parent's prompt_toolkit TUI.
+
+    Governed by delegation.subagent_auto_approve:
+      false (default) → _subagent_auto_deny
+      true            → _subagent_auto_approve
+    """
+
+    def test_auto_deny_returns_deny(self):
+        from tools.delegate_tool import _subagent_auto_deny
+        self.assertEqual(
+            _subagent_auto_deny("rm -rf /tmp/x", "dangerous"),
+            "deny",
+        )
+
+    def test_auto_approve_returns_once(self):
+        from tools.delegate_tool import _subagent_auto_approve
+        self.assertEqual(
+            _subagent_auto_approve("rm -rf /tmp/x", "dangerous"),
+            "once",
+        )
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_getter_defaults_to_deny(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_deny,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": False},
+    )
+    def test_getter_explicit_false_is_deny(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_deny,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": True},
+    )
+    def test_getter_true_is_approve(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_approve,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": "yes"},
+    )
+    def test_getter_truthy_string_is_approve(self, _mock_cfg):
+        """is_truthy_value accepts 'yes'/'1'/'true' as truthy."""
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_approve,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
+
+    def test_executor_initializer_installs_callback_in_worker(self):
+        """The initializer sets the callback on the worker thread's TLS,
+        not the parent's — verifies the fix actually scopes to workers.
+        """
+        from concurrent.futures import ThreadPoolExecutor
+        from tools.terminal_tool import (
+            set_approval_callback as _set_cb,
+            _get_approval_callback,
+        )
+        from tools.delegate_tool import _subagent_auto_deny
+
+        # Parent thread has no callback.
+        _set_cb(None)
+        self.assertIsNone(_get_approval_callback())
+
+        seen = []
+
+        def worker():
+            seen.append(_get_approval_callback())
+
+        with ThreadPoolExecutor(
+            max_workers=1,
+            initializer=_set_cb,
+            initargs=(_subagent_auto_deny,),
+        ) as executor:
+            executor.submit(worker).result()
+
+        self.assertEqual(seen, [_subagent_auto_deny])
+        # Parent's callback slot is still empty (TLS isolates threads).
+        self.assertIsNone(_get_approval_callback())
+
+
 if __name__ == "__main__":
    unittest.main()
@@ -11,6 +11,8 @@ import pytest
 from tools.discord_tool import (
    DiscordAPIError,
    _ACTIONS,
+    _ADMIN_ACTIONS,
+    _CORE_ACTIONS,
    _available_actions,
    _build_schema,
    _channel_type_name,
@@ -21,8 +23,11 @@ from tools.discord_tool import (
    _load_allowed_actions_config,
    _reset_capability_cache,
    check_discord_tool_requirements,
-    discord_server,
+    discord_admin_handler,
+    discord_core,
    get_dynamic_schema,
+    get_dynamic_schema_admin,
+    get_dynamic_schema_core,
 )


@@ -147,32 +152,32 @@ class TestDiscordRequest:
 class TestDiscordServerValidation:
    def test_no_token(self, monkeypatch):
        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "error" in result
        assert "DISCORD_BOT_TOKEN" in result["error"]

    def test_unknown_action(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="bad_action"))
+        result = json.loads(discord_core(action="bad_action"))
        assert "error" in result
        assert "Unknown action" in result["error"]
        assert "available_actions" in result

    def test_missing_required_guild_id(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="list_channels"))
+        result = json.loads(discord_admin_handler(action="list_channels"))
        assert "error" in result
        assert "guild_id" in result["error"]

    def test_missing_required_channel_id(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="fetch_messages"))
+        result = json.loads(discord_core(action="fetch_messages"))
        assert "error" in result
        assert "channel_id" in result["error"]

    def test_missing_multiple_params(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="add_role"))
+        result = json.loads(discord_admin_handler(action="add_role"))
        assert "error" in result
        assert "guild_id" in result["error"]
        assert "user_id" in result["error"]
@@ -191,7 +196,7 @@ class TestListGuilds:
            {"id": "111", "name": "Test Server", "icon": "abc", "owner": True, "permissions": "123"},
            {"id": "222", "name": "Other Server", "icon": None, "owner": False, "permissions": "456"},
        ]
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert result["count"] == 2
        assert result["guilds"][0]["name"] == "Test Server"
        assert result["guilds"][1]["id"] == "222"
@@ -219,7 +224,7 @@ class TestServerInfo:
            "premium_subscription_count": 5,
            "verification_level": 1,
        }
-        result = json.loads(discord_server(action="server_info", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="server_info", guild_id="111"))
        assert result["name"] == "My Server"
        assert result["member_count"] == 42
        assert result["online_count"] == 10
@@ -242,7 +247,7 @@ class TestListChannels:
            {"id": "12", "name": "voice", "type": 2, "position": 1, "parent_id": "10", "topic": None, "nsfw": False},
            {"id": "13", "name": "no-category", "type": 0, "position": 0, "parent_id": None, "topic": None, "nsfw": False},
        ]
-        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
        assert result["total_channels"] == 3  # excludes the category itself
        groups = result["channel_groups"]
        # Uncategorized first
@@ -257,7 +262,7 @@ class TestListChannels:
    def test_empty_guild(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
        assert result["total_channels"] == 0


@@ -274,7 +279,7 @@ class TestChannelInfo:
            "topic": "Welcome!", "nsfw": False, "position": 0,
            "parent_id": "10", "rate_limit_per_user": 0, "last_message_id": "999",
        }
-        result = json.loads(discord_server(action="channel_info", channel_id="11"))
+        result = json.loads(discord_admin_handler(action="channel_info", channel_id="11"))
        assert result["name"] == "general"
        assert result["type"] == "text"
        assert result["guild_id"] == "111"
@@ -293,7 +298,7 @@ class TestListRoles:
            {"id": "2", "name": "Admin", "position": 2, "color": 16711680, "mentionable": True, "managed": False, "hoist": True},
            {"id": "3", "name": "Mod", "position": 1, "color": 255, "mentionable": True, "managed": False, "hoist": True},
        ]
-        result = json.loads(discord_server(action="list_roles", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_roles", guild_id="111"))
        assert result["count"] == 3
        # Should be sorted by position descending
        assert result["roles"][0]["name"] == "Admin"
@@ -317,7 +322,7 @@ class TestMemberInfo:
            "joined_at": "2024-01-01T00:00:00Z",
            "premium_since": None,
        }
-        result = json.loads(discord_server(action="member_info", guild_id="111", user_id="42"))
+        result = json.loads(discord_admin_handler(action="member_info", guild_id="111", user_id="42"))
        assert result["username"] == "testuser"
        assert result["nickname"] == "Testy"
        assert result["roles"] == ["2", "3"]
@@ -334,7 +339,7 @@ class TestSearchMembers:
        mock_req.return_value = [
            {"user": {"id": "42", "username": "testuser", "global_name": "Test", "bot": False}, "nick": None, "roles": []},
        ]
-        result = json.loads(discord_server(action="search_members", guild_id="111", query="test"))
+        result = json.loads(discord_core(action="search_members", guild_id="111", query="test"))
        assert result["count"] == 1
        assert result["members"][0]["username"] == "testuser"
        mock_req.assert_called_once_with(
@@ -346,7 +351,7 @@ class TestSearchMembers:
    def test_search_members_limit_capped(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        discord_server(action="search_members", guild_id="111", query="x", limit=200)
+        discord_core(action="search_members", guild_id="111", query="x", limit=200)
        call_params = mock_req.call_args[1]["params"]
        assert call_params["limit"] == "100"  # Capped at 100

@@ -370,7 +375,7 @@ class TestFetchMessages:
                "pinned": False,
            },
        ]
-        result = json.loads(discord_server(action="fetch_messages", channel_id="11"))
+        result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
        assert result["count"] == 1
        assert result["messages"][0]["content"] == "Hello world"
        assert result["messages"][0]["author"]["username"] == "user1"
@@ -379,7 +384,7 @@ class TestFetchMessages:
    def test_fetch_messages_with_pagination(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        discord_server(action="fetch_messages", channel_id="11", before="999", limit=10)
+        discord_core(action="fetch_messages", channel_id="11", before="999", limit=10)
        call_params = mock_req.call_args[1]["params"]
        assert call_params["before"] == "999"
        assert call_params["limit"] == "10"
@@ -396,7 +401,7 @@ class TestListPins:
        mock_req.return_value = [
            {"id": "500", "content": "Important announcement", "author": {"username": "admin"}, "timestamp": "2024-01-01T00:00:00Z"},
        ]
-        result = json.loads(discord_server(action="list_pins", channel_id="11"))
+        result = json.loads(discord_admin_handler(action="list_pins", channel_id="11"))
        assert result["count"] == 1
        assert result["pinned_messages"][0]["content"] == "Important announcement"

@@ -410,7 +415,7 @@ class TestPinUnpin:
    def test_pin_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None  # 204
-        result = json.loads(discord_server(action="pin_message", channel_id="11", message_id="500"))
+        result = json.loads(discord_admin_handler(action="pin_message", channel_id="11", message_id="500"))
        assert result["success"] is True
        mock_req.assert_called_once_with("PUT", "/channels/11/pins/500", "test-token")

@@ -418,7 +423,7 @@ class TestPinUnpin:
    def test_unpin_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(action="unpin_message", channel_id="11", message_id="500"))
+        result = json.loads(discord_admin_handler(action="unpin_message", channel_id="11", message_id="500"))
        assert result["success"] is True


@@ -431,7 +436,7 @@ class TestCreateThread:
    def test_create_standalone_thread(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = {"id": "800", "name": "New Thread"}
-        result = json.loads(discord_server(action="create_thread", channel_id="11", name="New Thread"))
+        result = json.loads(discord_core(action="create_thread", channel_id="11", name="New Thread"))
        assert result["success"] is True
        assert result["thread_id"] == "800"
        # Verify the API call
@@ -444,7 +449,7 @@ class TestCreateThread:
    def test_create_thread_from_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = {"id": "801", "name": "Discussion"}
-        result = json.loads(discord_server(
+        result = json.loads(discord_core(
            action="create_thread", channel_id="11", name="Discussion", message_id="1001",
        ))
        assert result["success"] is True
@@ -463,7 +468,7 @@ class TestRoleManagement:
    def test_add_role(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="add_role", guild_id="111", user_id="42", role_id="2",
        ))
        assert result["success"] is True
@@ -475,7 +480,7 @@ class TestRoleManagement:
    def test_remove_role(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="remove_role", guild_id="111", user_id="42", role_id="2",
        ))
        assert result["success"] is True
@@ -490,15 +495,23 @@ class TestErrorHandling:
    def test_api_error_handled(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.side_effect = DiscordAPIError(403, '{"message": "Missing Access"}')
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "error" in result
        assert "403" in result["error"]

    @patch("tools.discord_tool._discord_request")
-    def test_unexpected_error_handled(self, mock_req, monkeypatch):
+    def test_unexpected_error_handled_admin(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.side_effect = RuntimeError("something broke")
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
+        assert "error" in result
+        assert "something broke" in result["error"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_unexpected_error_handled_core(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.side_effect = RuntimeError("something broke")
+        result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
        assert "error" in result
        assert "something broke" in result["error"]

@@ -508,79 +521,109 @@ class TestErrorHandling:
 # ---------------------------------------------------------------------------

 class TestRegistration:
-    def test_tool_registered(self):
+    def test_core_tool_registered(self):
        from tools.registry import registry
-        entry = registry._tools.get("discord_server")
+        entry = registry._tools.get("discord")
        assert entry is not None
-        assert entry.schema["name"] == "discord_server"
+        assert entry.schema["name"] == "discord"
        assert entry.toolset == "discord"
        assert entry.check_fn is not None
        assert entry.requires_env == ["DISCORD_BOT_TOKEN"]

-    def test_schema_actions(self):
-        """Static schema should list all actions (the model_tools post-processing
-        narrows this per-session; static registration is the superset)."""
+    def test_admin_tool_registered(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
-        actions = entry.schema["parameters"]["properties"]["action"]["enum"]
-        expected = [
-            "list_guilds", "server_info", "list_channels", "channel_info",
-            "list_roles", "member_info", "search_members", "fetch_messages",
-            "list_pins", "pin_message", "unpin_message", "create_thread",
-            "add_role", "remove_role",
-        ]
-        assert set(actions) == set(expected)
-        assert set(_ACTIONS.keys()) == set(expected)
+        entry = registry._tools.get("discord_admin")
+        assert entry is not None
+        assert entry.schema["name"] == "discord_admin"
+        assert entry.toolset == "discord_admin"
+        assert entry.check_fn is not None
+        assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
+
+    def test_core_schema_actions(self):
+        """Core static schema should list only core actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord"]
+        actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == {"fetch_messages", "search_members", "create_thread"}
+
+    def test_admin_schema_actions(self):
+        """Admin static schema should list only admin actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord_admin"]
+        actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
+        expected_admin = set(_ACTIONS.keys()) - {"fetch_messages", "search_members", "create_thread"}
+        assert actions == expected_admin
+
+    def test_all_actions_covered(self):
+        """Core + admin actions should cover all known actions."""
+        assert set(_CORE_ACTIONS.keys()) | set(_ADMIN_ACTIONS.keys()) == set(_ACTIONS.keys())
+        assert set(_CORE_ACTIONS.keys()) & set(_ADMIN_ACTIONS.keys()) == set()

    def test_schema_parameter_bounds(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        props = entry.schema["parameters"]["properties"]
        assert props["limit"]["minimum"] == 1
        assert props["limit"]["maximum"] == 100
        assert props["auto_archive_duration"]["enum"] == [60, 1440, 4320, 10080]

-    def test_schema_description_is_action_manifest(self):
-        """The top-level description should include the action manifest
-        (one-line signatures per action) so the model can find required
-        params without re-reading every parameter description."""
+    def test_core_schema_description(self):
+        """Core schema description should mention core actions."""
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        desc = entry.schema["description"]
-        # Spot-check a few entries
-        assert "list_guilds()" in desc
        assert "fetch_messages(channel_id)" in desc
+        assert "search_members(guild_id, query)" in desc
+        assert "create_thread(channel_id, name)" in desc
+        # Admin actions should NOT be in core description
+        assert "list_guilds()" not in desc
+        assert "add_role(" not in desc
+
+    def test_admin_schema_description(self):
+        """Admin schema description should mention admin actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord_admin"]
+        desc = entry.schema["description"]
+        assert "list_guilds()" in desc
        assert "add_role(guild_id, user_id, role_id)" in desc
+        # Core actions should NOT be in admin description
+        assert "fetch_messages(" not in desc
+        assert "create_thread(" not in desc

    def test_handler_callable(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        assert callable(entry.handler)
+        entry_admin = registry._tools["discord_admin"]
+        assert callable(entry_admin.handler)


 # ---------------------------------------------------------------------------
-# Toolset: discord_server only in hermes-discord
+# Toolset: discord / discord_admin only in hermes-discord
 # ---------------------------------------------------------------------------

 class TestToolsetInclusion:
-    def test_discord_server_in_hermes_discord_toolset(self):
+    def test_discord_tools_in_hermes_discord_toolset(self):
        from toolsets import TOOLSETS
-        assert "discord_server" in TOOLSETS["hermes-discord"]["tools"]
+        assert "discord" in TOOLSETS["hermes-discord"]["tools"]
+        assert "discord_admin" in TOOLSETS["hermes-discord"]["tools"]

-    def test_discord_server_not_in_core_tools(self):
+    def test_discord_tools_not_in_core_tools(self):
        from toolsets import _HERMES_CORE_TOOLS
-        assert "discord_server" not in _HERMES_CORE_TOOLS
+        assert "discord" not in _HERMES_CORE_TOOLS
+        assert "discord_admin" not in _HERMES_CORE_TOOLS

-    def test_discord_server_not_in_other_toolsets(self):
+    def test_discord_tools_not_in_other_toolsets(self):
        from toolsets import TOOLSETS
        for name, ts in TOOLSETS.items():
-            if name == "hermes-discord":
+            if name in ("hermes-discord", "hermes-gateway", "discord", "discord_admin"):
                continue
-            # The gateway toolset might include it if it unions all platform tools
-            if name == "hermes-gateway":
-                continue
-            assert "discord_server" not in ts.get("tools", []), (
-                f"discord_server should not be in toolset '{name}'"
+            tools = ts.get("tools", [])
+            assert "discord" not in tools or name == "discord", (
+                f"discord tool should not be in toolset '{name}'"
+            )
+            assert "discord_admin" not in tools or name == "discord_admin", (
+                f"discord_admin tool should not be in toolset '{name}'"
            )


@@ -798,40 +841,69 @@ class TestDynamicSchema:
    @patch("tools.discord_tool._discord_request")
    def test_no_token_returns_none(self, mock_req, monkeypatch):
        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
-        assert get_dynamic_schema() is None
+        assert get_dynamic_schema_core() is None
+        assert get_dynamic_schema_admin() is None
        mock_req.assert_not_called()

    @patch("tools.discord_tool._discord_request")
-    def test_full_intents_full_schema(self, mock_req, monkeypatch):
+    def test_full_intents_core_schema(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        schema = get_dynamic_schema()
-        actions = schema["parameters"]["properties"]["action"]["enum"]
-        assert set(actions) == set(_ACTIONS.keys())
-        # No content warning
+        schema = get_dynamic_schema_core()
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_CORE_ACTIONS.keys())
+        assert schema["name"] == "discord"
+
+    @patch("tools.discord_tool._discord_request")
+    def test_full_intents_admin_schema(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema_admin()
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_ADMIN_ACTIONS.keys())
+        assert schema["name"] == "discord_admin"
+        # No content warning when MESSAGE_CONTENT is enabled
        assert "MESSAGE_CONTENT" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
-    def test_no_members_intent_removes_member_actions_from_schema(
+    def test_no_members_intent_removes_member_actions_from_admin_schema(
        self, mock_req, monkeypatch,
    ):
+        """member_info is an admin action; it should be hidden when
+        GUILD_MEMBERS intent is missing."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": 1 << 18}  # only MESSAGE_CONTENT
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_admin()
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert "member_info" not in actions
+        assert "member_info" not in schema["description"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_members_intent_hides_search_members_from_core(
+        self, mock_req, monkeypatch,
+    ):
+        """search_members is a core action gated by GUILD_MEMBERS intent."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": 1 << 18}  # only MESSAGE_CONTENT
+        schema = get_dynamic_schema_core()
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert "search_members" not in actions
-        assert "member_info" not in actions
-        # Manifest description should also not advertise them
-        assert "search_members" not in schema["description"]
-        assert "member_info" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
    def test_no_message_content_adds_warning_note(self, mock_req, monkeypatch):
@@ -841,41 +913,53 @@ class TestDynamicSchema:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": 1 << 14}  # only GUILD_MEMBERS
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_core()
        assert "MESSAGE_CONTENT" in schema["description"]
        # But fetch_messages is still available
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert "fetch_messages" in actions

    @patch("tools.discord_tool._discord_request")
-    def test_config_allowlist_narrows_schema(self, mock_req, monkeypatch):
+    def test_config_allowlist_narrows_admin_schema(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "list_guilds,list_channels"}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_admin()
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert actions == ["list_guilds", "list_channels"]
-        # Manifest description should only show allowed ones (check for
-        # the signature marker, which is specific to manifest lines)
        assert "list_guilds()" in schema["description"]
        assert "add_role(" not in schema["description"]
-        assert "create_thread(" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
-    def test_empty_allowlist_with_valid_values_hides_tool(self, mock_req, monkeypatch):
+    def test_empty_allowlist_with_valid_values_hides_tools(self, mock_req, monkeypatch):
        """If the allowlist resolves to zero valid actions (e.g. all names
-        were typos), get_dynamic_schema returns None so the tool is dropped
-        entirely rather than showing an empty enum."""
+        were typos), get_dynamic_schema returns None so the tool is dropped."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "typo_one,typo_two"}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        assert get_dynamic_schema() is None
+        assert get_dynamic_schema_core() is None
+        assert get_dynamic_schema_admin() is None
+
+    @patch("tools.discord_tool._discord_request")
+    def test_backward_compat_wrapper(self, mock_req, monkeypatch):
+        """get_dynamic_schema() should delegate to get_dynamic_schema_core()."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema()
+        assert schema is not None
+        assert schema["name"] == "discord"
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_CORE_ACTIONS.keys())


 # ---------------------------------------------------------------------------
@@ -890,7 +974,7 @@ class TestRuntimeAllowlistEnforcement:
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "list_guilds"}},
        )
-        result = json.loads(discord_server(action="add_role", guild_id="1", user_id="2", role_id="3"))
+        result = json.loads(discord_admin_handler(action="add_role", guild_id="1", user_id="2", role_id="3"))
        assert "error" in result
        assert "disabled by config" in result["error"]
        mock_req.assert_not_called()
@@ -903,7 +987,7 @@ class TestRuntimeAllowlistEnforcement:
            lambda: {"discord": {"server_actions": "list_guilds"}},
        )
        mock_req.return_value = []
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "guilds" in result


@@ -930,7 +1014,7 @@ class Test403Enrichment:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.side_effect = DiscordAPIError(403, '{"message":"Missing Permissions"}')
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="add_role", guild_id="1", user_id="2", role_id="3",
        ))
        assert "error" in result
@@ -944,7 +1028,7 @@ class Test403Enrichment:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.side_effect = DiscordAPIError(500, "server error")
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "500" in result["error"]
        assert "MANAGE_ROLES" not in result["error"]

@@ -961,10 +1045,10 @@ class TestModelToolsIntegration:
        _reset_capability_cache()

    @patch("tools.discord_tool._discord_request")
-    def test_discord_server_schema_rebuilt_by_get_tool_definitions(
+    def test_discord_admin_schema_rebuilt_by_get_tool_definitions(
        self, mock_req, monkeypatch,
    ):
-        """When model_tools.get_tool_definitions runs with discord_server
+        """When model_tools.get_tool_definitions runs with discord_admin
        available, it should replace the static schema with the dynamic one."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
@@ -976,16 +1060,16 @@ class TestModelToolsIntegration:

        from model_tools import get_tool_definitions
        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
-        discord_tool = next(
-            (t for t in tools if t.get("function", {}).get("name") == "discord_server"),
+        discord_admin_tool = next(
+            (t for t in tools if t.get("function", {}).get("name") == "discord_admin"),
            None,
        )
-        assert discord_tool is not None, "discord_server should be in the schema"
-        actions = discord_tool["function"]["parameters"]["properties"]["action"]["enum"]
+        assert discord_admin_tool is not None, "discord_admin should be in the schema"
+        actions = discord_admin_tool["function"]["parameters"]["properties"]["action"]["enum"]
        assert actions == ["list_guilds", "server_info"]

    @patch("tools.discord_tool._discord_request")
-    def test_discord_server_dropped_when_allowlist_empties_it(
+    def test_discord_tools_dropped_when_allowlist_empties_them(
        self, mock_req, monkeypatch,
    ):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
@@ -998,4 +1082,6 @@ class TestModelToolsIntegration:
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
        names = [t.get("function", {}).get("name") for t in tools]
+        assert "discord" not in names
+        assert "discord_admin" not in names
        assert "discord_server" not in names
@@ -19,9 +19,11 @@ from unittest.mock import patch
 from tools.process_registry import (
    ProcessRegistry,
    ProcessSession,
-    WATCH_MAX_PER_WINDOW,
-    WATCH_WINDOW_SECONDS,
-    WATCH_OVERLOAD_KILL_SECONDS,
+    WATCH_MIN_INTERVAL_SECONDS,
+    WATCH_STRIKE_LIMIT,
+    WATCH_GLOBAL_MAX_PER_WINDOW,
+    WATCH_GLOBAL_WINDOW_SECONDS,
+    WATCH_GLOBAL_COOLDOWN_SECONDS,
 )


@@ -129,10 +131,15 @@ class TestCheckWatchPatterns:
        assert registry.completion_queue.empty()

    def test_hit_counter_increments(self, registry):
-        """Each delivered notification increments _watch_hits."""
+        """Each delivered notification increments _watch_hits.
+
+        With 1/15s rate limit, we need to reset cooldown between calls.
+        """
        session = _make_session(watch_patterns=["X"])
        registry._check_watch_patterns(session, "X\n")
        assert session._watch_hits == 1
+        # Reset cooldown so the second match gets delivered.
+        session._watch_cooldown_until = 0.0
        registry._check_watch_patterns(session, "X\n")
        assert session._watch_hits == 2

@@ -148,100 +155,114 @@ class TestCheckWatchPatterns:


 # =========================================================================
-# Rate limiting
+# Per-session rate limiting: 1 notification per 15s, 3 strikes → disable
 # =========================================================================

-class TestRateLimiting:
-    def test_within_window_limit(self, registry):
-        """Notifications within the rate limit all get delivered."""
+class TestPerSessionRateLimit:
+    def test_first_match_delivers(self, registry):
+        """A fresh session with no prior cooldown delivers the first match."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW
+        registry._check_watch_patterns(session, "E first\n")
+        assert registry.completion_queue.qsize() == 1
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+        assert session._watch_hits == 1
+        # Cooldown is now armed.
+        assert session._watch_cooldown_until > 0

-    def test_exceeds_window_limit(self, registry):
-        """Notifications beyond the rate limit are suppressed."""
+    def test_second_match_within_cooldown_is_suppressed(self, registry):
+        """A second match inside the 15s cooldown is dropped and counted."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW + 5):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # Only WATCH_MAX_PER_WINDOW should be in the queue
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW
-        assert session._watch_suppressed == 5
-
-    def test_window_resets(self, registry):
-        """After the window expires, notifications can flow again."""
-        session = _make_session(watch_patterns=["E"])
-        # Fill the window
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # One more should be suppressed
-        registry._check_watch_patterns(session, "E extra\n")
+        registry._check_watch_patterns(session, "E first\n")
+        assert registry.completion_queue.qsize() == 1
+        # Immediately trigger another match — well inside cooldown.
+        registry._check_watch_patterns(session, "E second\n")
+        # Still only one notification.
+        assert registry.completion_queue.qsize() == 1
        assert session._watch_suppressed == 1
+        assert session._watch_consecutive_strikes == 1

-        # Fast-forward past window
-        session._watch_window_start = time.time() - WATCH_WINDOW_SECONDS - 1
-        registry._check_watch_patterns(session, "E after reset\n")
-        # Should deliver now (window reset)
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW + 1
-
-    def test_suppressed_count_in_next_delivery(self, registry):
-        """Suppressed count is reported in the next successful delivery."""
+    def test_many_drops_inside_window_count_as_ONE_strike(self, registry):
+        """Multiple suppressions inside the same cooldown window = 1 strike."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # Suppress 3 more
-        for i in range(3):
-            registry._check_watch_patterns(session, f"E suppressed {i}\n")
-        assert session._watch_suppressed == 3
+        registry._check_watch_patterns(session, "E\n")
+        for _ in range(10):
+            registry._check_watch_patterns(session, "E\n")
+        assert session._watch_consecutive_strikes == 1
+        assert session._watch_suppressed == 10

-        # Fast-forward past window to allow delivery
-        session._watch_window_start = time.time() - WATCH_WINDOW_SECONDS - 1
-        registry._check_watch_patterns(session, "E back\n")
-        # Drain to the last event
-        last_evt = None
-        while not registry.completion_queue.empty():
-            last_evt = registry.completion_queue.get_nowait()
-        assert last_evt["suppressed"] == 3
-        assert session._watch_suppressed == 0  # reset after delivery
-
-
-# =========================================================================
-# Overload kill switch
-# =========================================================================
-
-class TestOverloadKillSwitch:
-    def test_sustained_overload_disables(self, registry):
-        """Sustained overload beyond threshold permanently disables watching."""
+    def test_three_strikes_disables_watch_and_promotes_to_notify(self, registry):
+        """Three consecutive strike windows → watch_disabled + notify_on_complete."""
        session = _make_session(watch_patterns=["E"])
-        # Fill the window to trigger rate limit
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
+        session.notify_on_complete = False

-        # Simulate sustained overload: set overload_since to past threshold
-        session._watch_overload_since = time.time() - WATCH_OVERLOAD_KILL_SECONDS - 1
-        # Force another suppressed hit
-        registry._check_watch_patterns(session, "E overload\n")
-        registry._check_watch_patterns(session, "E overload2\n")
+        for strike in range(WATCH_STRIKE_LIMIT):
+            # Emit → arms cooldown.
+            registry._check_watch_patterns(session, f"E emit {strike}\n")
+            # Attempt while inside cooldown → one strike, dropped.
+            registry._check_watch_patterns(session, f"E drop {strike}\n")
+            # Fast-forward past the cooldown for the NEXT iteration, BUT leave
+            # the strike candidate set so the cooldown-expiry branch sees
+            # "this was a strike window" and doesn't reset the counter.
+            session._watch_cooldown_until = time.time() - 0.01

+        # After WATCH_STRIKE_LIMIT strikes, the next attempt should find
+        # the session disabled.
        assert session._watch_disabled is True
-        # Should have a watch_disabled event in the queue
+        assert session.notify_on_complete is True
+        # One watch_disabled summary event should be in the queue.
        disabled_evts = []
+        matches = 0
        while not registry.completion_queue.empty():
            evt = registry.completion_queue.get_nowait()
            if evt.get("type") == "watch_disabled":
                disabled_evts.append(evt)
+            elif evt.get("type") == "watch_match":
+                matches += 1
        assert len(disabled_evts) == 1
-        assert "too many matches" in disabled_evts[0]["message"]
+        assert "notify_on_complete" in disabled_evts[0]["message"]
+        # We should have had exactly WATCH_STRIKE_LIMIT emissions before disable.
+        assert matches == WATCH_STRIKE_LIMIT

-    def test_overload_resets_on_delivery(self, registry):
-        """Overload timer resets when a notification gets through."""
+    def test_clean_window_resets_strike_counter(self, registry):
+        """A cooldown that expires with zero drops resets the consecutive counter."""
        session = _make_session(watch_patterns=["E"])
-        # Start overload tracking
-        session._watch_overload_since = time.time() - 10
-        # But window allows delivery → overload should reset
-        registry._check_watch_patterns(session, "E ok\n")
-        assert session._watch_overload_since == 0.0
-        assert session._watch_disabled is False
+        # Emit + drop inside window → 1 strike.
+        registry._check_watch_patterns(session, "E emit\n")
+        registry._check_watch_patterns(session, "E drop\n")
+        assert session._watch_consecutive_strikes == 1
+
+        # Fast-forward past cooldown. No match arrived during the window —
+        # strike_candidate stays False from the prior window's reset, but
+        # it was True during that window. On the NEXT emission, the
+        # cooldown-expiry branch checks strike_candidate. Since we emitted
+        # at the start of this new window and no drop has happened, the
+        # reset branch should fire.
+        session._watch_cooldown_until = time.time() - 0.01
+        # Clear strike candidate to simulate "this cooldown had no drops".
+        session._watch_strike_candidate = False
+        registry._check_watch_patterns(session, "E clean\n")
+        assert session._watch_consecutive_strikes == 0
+
+    def test_suppressed_count_in_next_delivery(self, registry):
+        """Suppressed count from a strike window is reported in the next emit."""
+        session = _make_session(watch_patterns=["E"])
+        registry._check_watch_patterns(session, "E emit\n")
+        for _ in range(4):
+            registry._check_watch_patterns(session, "E drop\n")
+        assert session._watch_suppressed == 4
+
+        # Fast-forward past cooldown.
+        session._watch_cooldown_until = time.time() - 0.01
+        # Drain the queue so we can inspect the next emission.
+        while not registry.completion_queue.empty():
+            registry.completion_queue.get_nowait()
+
+        registry._check_watch_patterns(session, "E back\n")
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+        assert evt["suppressed"] == 4
+        assert session._watch_suppressed == 0  # reset after delivery


 # =========================================================================
@@ -321,3 +342,150 @@ class TestCodeExecutionBlocked:
    def test_watch_patterns_blocked(self):
        from tools.code_execution_tool import _TERMINAL_BLOCKED_PARAMS
        assert "watch_patterns" in _TERMINAL_BLOCKED_PARAMS
+
+
+# =========================================================================
+# Suppress-after-exit (anti-spam fix)
+# =========================================================================
+
+class TestSuppressAfterExit:
+    def test_match_dropped_once_session_exited(self, registry):
+        """watch_patterns notifications stop the moment session.exited is set."""
+        session = _make_session(watch_patterns=["ERROR"])
+        # Mark the process as exited BEFORE the late chunk arrives.
+        session.exited = True
+        registry._check_watch_patterns(session, "ERROR: late buffer\n")
+        assert registry.completion_queue.empty()
+        assert session._watch_hits == 0
+
+    def test_match_still_delivered_while_session_running(self, registry):
+        """Sanity: while the process is still running, matches still deliver."""
+        session = _make_session(watch_patterns=["ERROR"])
+        session.exited = False
+        registry._check_watch_patterns(session, "ERROR: oh no\n")
+        assert not registry.completion_queue.empty()
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+
+
+# =========================================================================
+# Mutual exclusion: notify_on_complete wins over watch_patterns
+# =========================================================================
+
+class TestMutualExclusion:
+    def test_resolver_drops_watch_when_notify_set(self):
+        """Both flags set → watch_patterns dropped with a note."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=["ERROR", "DONE"],
+            background=True,
+        )
+        assert resolved is None
+        assert "notify_on_complete" in note
+        assert "duplicate notifications" in note
+
+    def test_resolver_keeps_watch_when_notify_off(self):
+        """notify_on_complete=False → watch_patterns kept intact."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=False,
+            watch_patterns=["ERROR"],
+            background=True,
+        )
+        assert resolved == ["ERROR"]
+        assert note == ""
+
+    def test_resolver_keeps_notify_when_no_watch(self):
+        """Only notify_on_complete set → no conflict."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=None,
+            background=True,
+        )
+        assert resolved is None
+        assert note == ""
+
+    def test_resolver_inert_when_not_background(self):
+        """Without background=True, the whole thing is a no-op."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=["ERROR"],
+            background=False,
+        )
+        assert resolved == ["ERROR"]
+        assert note == ""
+
+
+# =========================================================================
+# Global circuit breaker (cross-session overflow blocker)
+# =========================================================================
+
+class TestGlobalCircuitBreaker:
+    def test_trips_after_global_threshold(self, registry):
+        """When >N matches fire across sessions in the window, breaker trips."""
+        sessions = [
+            _make_session(sid=f"proc_s{i}", watch_patterns=["E"])
+            for i in range(WATCH_GLOBAL_MAX_PER_WINDOW + 3)
+        ]
+        # Each session fires exactly one match — individually well under the
+        # per-session cap. But collectively they should trip the global cap.
+        for s in sessions:
+            registry._check_watch_patterns(s, "E hit\n")
+
+        # Drain the queue and count event types.
+        watch_matches = 0
+        overflow_tripped = 0
+        while not registry.completion_queue.empty():
+            evt = registry.completion_queue.get_nowait()
+            if evt.get("type") == "watch_match":
+                watch_matches += 1
+            elif evt.get("type") == "watch_overflow_tripped":
+                overflow_tripped += 1
+        assert watch_matches == WATCH_GLOBAL_MAX_PER_WINDOW
+        assert overflow_tripped == 1
+        assert registry._global_watch_tripped_until > 0
+
+    def test_cooldown_suppresses_and_then_releases(self, registry):
+        """After trip, further events are suppressed; cooldown expiry emits release."""
+        # Spawn enough fresh sessions to trip the global breaker.
+        sessions = [
+            _make_session(sid=f"proc_t{i}", watch_patterns=["E"])
+            for i in range(WATCH_GLOBAL_MAX_PER_WINDOW + 1)
+        ]
+        for s in sessions:
+            registry._check_watch_patterns(s, "E hit\n")
+        assert registry._global_watch_tripped_until > 0
+
+        # Further matches from BRAND-NEW sessions during cooldown are dropped.
+        q_size_before = registry.completion_queue.qsize()
+        extra1 = _make_session(sid="proc_extra1", watch_patterns=["E"])
+        extra2 = _make_session(sid="proc_extra2", watch_patterns=["E"])
+        registry._check_watch_patterns(extra1, "E hit\n")
+        registry._check_watch_patterns(extra2, "E hit\n")
+        assert registry.completion_queue.qsize() == q_size_before  # no new events
+        assert registry._global_watch_suppressed_during_trip >= 2
+
+        # Simulate cooldown expiry.
+        registry._global_watch_tripped_until = time.time() - 1
+
+        # Next call admits AND emits the release summary.
+        released_session = _make_session(sid="proc_after", watch_patterns=["E"])
+        registry._check_watch_patterns(released_session, "E hit\n")
+        released = False
+        admitted = False
+        while not registry.completion_queue.empty():
+            evt = registry.completion_queue.get_nowait()
+            if evt.get("type") == "watch_overflow_released":
+                released = True
+                assert evt["suppressed"] >= 2
+            elif evt.get("type") == "watch_match":
+                admitted = True
+        assert released
+        assert admitted
@@ -11,7 +11,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union

 from hermes_constants import display_hermes_home

@@ -238,6 +238,7 @@ def cronjob(
    base_url: Optional[str] = None,
    reason: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
    task_id: str = None,
@@ -265,6 +266,18 @@ def cronjob(
                if script_error:
                    return tool_error(script_error, success=False)

+            # Validate context_from references existing jobs
+            if context_from:
+                from cron.jobs import get_job as _get_job
+                refs = [context_from] if isinstance(context_from, str) else context_from
+                for ref_id in refs:
+                    if not _get_job(ref_id):
+                        return tool_error(
+                            f"context_from job '{ref_id}' not found. "
+                            "Use cronjob(action='list') to see available jobs.",
+                            success=False,
+                        )
+
            job = create_job(
                prompt=prompt or "",
                schedule=schedule,
@@ -277,6 +290,7 @@ def cronjob(
                provider=_normalize_optional_job_value(provider),
                base_url=_normalize_optional_job_value(base_url, strip_trailing_slash=True),
                script=_normalize_optional_job_value(script),
+                context_from=context_from,
                enabled_toolsets=enabled_toolsets or None,
                workdir=_normalize_optional_job_value(workdir),
            )
@@ -368,6 +382,24 @@ def cronjob(
                    if script_error:
                        return tool_error(script_error, success=False)
                updates["script"] = _normalize_optional_job_value(script) if script else None
+            if context_from is not None:
+                # Empty string / empty list clears the field; otherwise validate
+                # each referenced job exists before storing. Normalized to a list
+                # (or None) to match the shape stored by create_job().
+                if isinstance(context_from, str):
+                    refs = [context_from.strip()] if context_from.strip() else []
+                else:
+                    refs = [str(j).strip() for j in context_from if str(j).strip()]
+                if refs:
+                    from cron.jobs import get_job as _get_job
+                    for ref_id in refs:
+                        if not _get_job(ref_id):
+                            return tool_error(
+                                f"context_from job '{ref_id}' not found. "
+                                "Use cronjob(action='list') to see available jobs.",
+                                success=False,
+                            )
+                updates["context_from"] = refs or None
            if enabled_toolsets is not None:
                updates["enabled_toolsets"] = enabled_toolsets or None
            if workdir is not None:
@@ -473,6 +505,19 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
                "type": "string",
                "description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
            },
+            "context_from": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": (
+                    "Optional job ID or list of job IDs whose most recent completed output is "
+                    "injected into the prompt as context before each run. "
+                    "Use this to chain cron jobs: job A collects data, job B processes it. "
+                    "Each entry must be a valid job ID (from cronjob action='list'). "
+                    "Note: injects the most recent completed output — does not wait for "
+                    "upstream jobs running in the same tick. "
+                    "On update, pass an empty array to clear."
+                ),
+            },
            "enabled_toolsets": {
                "type": "array",
                "items": {"type": "string"},
@@ -526,6 +571,7 @@ registry.register(
        base_url=args.get("base_url"),
        reason=args.get("reason"),
        script=args.get("script"),
+        context_from=args.get("context_from"),
        enabled_toolsets=args.get("enabled_toolsets"),
        workdir=args.get("workdir"),
        task_id=kw.get("task_id"),
@@ -33,6 +33,7 @@ from typing import Any, Dict, List, Optional

 from toolsets import TOOLSETS
 from tools import file_state
+from tools.terminal_tool import set_approval_callback as _set_subagent_approval_cb
 from utils import base_url_hostname, is_truthy_value


@@ -47,6 +48,64 @@ DELEGATE_BLOCKED_TOOLS = frozenset(
    ]
 )

+
+# ---------------------------------------------------------------------------
+# Subagent approval callbacks
+# ---------------------------------------------------------------------------
+# Subagents run inside a ThreadPoolExecutor worker. The CLI's interactive
+# approval callback is stored in tools/terminal_tool.py's threading.local(),
+# so worker threads do NOT inherit it. Without a callback,
+# prompt_dangerous_approval() falls back to input() from the worker thread,
+# which deadlocks against the parent's prompt_toolkit TUI that owns stdin.
+#
+# Fix: install a non-interactive callback into every subagent worker thread
+# via ThreadPoolExecutor(initializer=_set_subagent_approval_cb, initargs=(cb,)).
+# The callback is chosen by the `delegation.subagent_auto_approve` config:
+#   false (default) → _subagent_auto_deny (safe; matches leaf tool blocklist)
+#   true            → _subagent_auto_approve (opt-in YOLO for cron/batch)
+# Both emit a logger.warning for audit; gateway sessions are unaffected
+# because they resolve approvals via tools/approval.py's per-session queue,
+# not through these TLS callbacks.
+def _subagent_auto_deny(command: str, description: str, **kwargs) -> str:
+    """Auto-deny dangerous commands in subagent threads (safe default).
+
+    Returns 'deny' so the subagent sees a refusal it can recover from, and
+    never calls input() (which would deadlock the parent TUI).
+    """
+    logger.warning(
+        "Subagent auto-denied dangerous command: %s (%s). "
+        "Set delegation.subagent_auto_approve: true to allow.",
+        command, description,
+    )
+    return "deny"
+
+
+def _subagent_auto_approve(command: str, description: str, **kwargs) -> str:
+    """Auto-approve dangerous commands in subagent threads (opt-in YOLO).
+
+    Only installed when delegation.subagent_auto_approve=true. Returns 'once'
+    so the subagent proceeds without blocking the parent UI.
+    """
+    logger.warning(
+        "Subagent auto-approved dangerous command: %s (%s)",
+        command, description,
+    )
+    return "once"
+
+
+def _get_subagent_approval_callback():
+    """Return the callback to install into subagent worker threads.
+
+    Config key: delegation.subagent_auto_approve (bool, default False).
+    Reads via the same _load_config() path as the rest of delegate_task so
+    priority is config.yaml > (no env override for this knob) > default.
+    """
+    cfg = _load_config()
+    val = cfg.get("subagent_auto_approve", False)
+    if is_truthy_value(val):
+        return _subagent_auto_approve
+    return _subagent_auto_deny
+
 # Build a description fragment listing toolsets available for subagents.
 # Excludes toolsets where ALL tools are blocked, composite/platform toolsets
 # (hermes-* prefixed), and scenario toolsets.
@@ -1344,7 +1403,15 @@ def _run_single_child(
        # Run child with a hard timeout to prevent indefinite blocking
        # when the child's API call or tool-level HTTP request hangs.
        child_timeout = _get_child_timeout()
-        _timeout_executor = ThreadPoolExecutor(max_workers=1)
+        _timeout_executor = ThreadPoolExecutor(
+            max_workers=1,
+            # Install a non-interactive approval callback in the worker thread
+            # so dangerous-command prompts from the subagent don't fall back to
+            # input() and deadlock the parent's prompt_toolkit TUI.
+            # Callback (deny vs approve) is governed by delegation.subagent_auto_approve.
+            initializer=_set_subagent_approval_cb,
+            initargs=(_get_subagent_approval_callback(),),
+        )
        # Capture the worker thread so the timeout diagnostic can dump its
        # Python stack (see #14726 — 0-API-call hangs are opaque without it).
        _worker_thread_holder: Dict[str, Optional[threading.Thread]] = {"t": None}
@@ -473,6 +473,12 @@ _ACTIONS = {
    "remove_role": _remove_role,
 }

+_CORE_ACTION_NAMES = frozenset({"fetch_messages", "search_members", "create_thread"})
+_ADMIN_ACTION_NAMES = frozenset(_ACTIONS.keys()) - _CORE_ACTION_NAMES
+
+_CORE_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _CORE_ACTION_NAMES}
+_ADMIN_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _ADMIN_ACTION_NAMES}
+
 # Single-source-of-truth manifest: action → (signature, one-line description).
 # Consumed by :func:`_build_schema` so the schema's top-level description
 # always matches the registered action set.
@@ -531,7 +537,7 @@ def _load_allowed_actions_config() -> Optional[List[str]]:
        from hermes_cli.config import load_config
        cfg = load_config()
    except Exception as exc:
-        logger.debug("discord_server: could not load config (%s); allowing all actions.", exc)
+        logger.debug("discord: could not load config (%s); allowing all actions.", exc)
        return None

    raw = (cfg.get("discord") or {}).get("server_actions")
@@ -586,12 +592,16 @@ def _available_actions(
 def _build_schema(
    actions: List[str],
    caps: Optional[Dict[str, Any]] = None,
-) -> Dict[str, Any]:
-    """Build the tool schema for the given filtered action list."""
+    tool_name: str = "discord",
+) -> Optional[Dict[str, Any]]:
+    """Build the tool schema for the given filtered action list.
+
+    Returns ``None`` when *actions* is empty — callers should drop the
+    tool from registration in that case.
+    """
    caps = caps or {}
    if not actions:
-        # Tool shouldn't be registered when empty, but guard anyway.
-        actions = list(_ACTIONS.keys())
+        return None

    # Action manifest lines (action-first, parameter-scoped).
    manifest_lines = [
@@ -602,24 +612,36 @@ def _build_schema(
    manifest_block = "\n".join(manifest_lines)

    content_note = ""
-    if caps.get("detected") and caps.get("has_message_content") is False:
+    affected_actions = {"fetch_messages", "list_pins"} & set(actions)
+    if affected_actions and caps.get("detected") and caps.get("has_message_content") is False:
+        names = " and ".join(sorted(affected_actions))
        content_note = (
-            "\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
-            "fetch_messages and list_pins will return message metadata (author, "
+            f"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
+            f"{names} will return message metadata (author, "
            "timestamps, attachments, reactions, pin state) but `content` will be "
            "empty for messages not sent as a direct mention to the bot or in DMs. "
            "Enable the intent in the Discord Developer Portal to see all content."
        )

-    description = (
-        "Query and manage a Discord server via the REST API.\n\n"
-        "Available actions:\n"
-        f"{manifest_block}\n\n"
-        "Call list_guilds first to discover guild_ids, then list_channels for "
-        "channel_ids. Runtime errors will tell you if the bot lacks a specific "
-        "per-guild permission (e.g. MANAGE_ROLES for add_role)."
-        f"{content_note}"
-    )
+    if tool_name == "discord_admin":
+        description = (
+            "Manage a Discord server via the REST API.\n\n"
+            "Available actions:\n"
+            f"{manifest_block}\n\n"
+            "Call list_guilds first to discover guild_ids, then list_channels for "
+            "channel_ids. Runtime errors will tell you if the bot lacks a specific "
+            "per-guild permission (e.g. MANAGE_ROLES for add_role)."
+            f"{content_note}"
+        )
+    else:
+        description = (
+            "Read and participate in a Discord server.\n\n"
+            "Available actions:\n"
+            f"{manifest_block}\n\n"
+            "Use the channel_id from the current conversation context. "
+            "Use search_members to look up user IDs by name prefix."
+            f"{content_note}"
+        )

    properties: Dict[str, Any] = {
        "action": {
@@ -676,7 +698,7 @@ def _build_schema(
    }

    return {
-        "name": "discord_server",
+        "name": tool_name,
        "description": description,
        "parameters": {
            "type": "object",
@@ -686,28 +708,33 @@ def _build_schema(
    }


-def get_dynamic_schema() -> Optional[Dict[str, Any]]:
-    """Return a schema filtered by current intents + config allowlist.
-
-    Called by ``model_tools.get_tool_definitions`` as a post-processing
-    step so the schema the model sees always reflects reality. Returns
-    ``None`` when no actions are available (tool should be removed from
-    the schema list entirely).
-    """
+def _get_dynamic_schema(
+    action_subset: Dict[str, Any],
+    tool_name: str,
+) -> Optional[Dict[str, Any]]:
+    """Build a dynamic schema for *action_subset* filtered by intents + config."""
    token = _get_bot_token()
    if not token:
        return None
-
    caps = _detect_capabilities(token)
    allowlist = _load_allowed_actions_config()
-    actions = _available_actions(caps, allowlist)
+    actions = [a for a in _available_actions(caps, allowlist) if a in action_subset]
    if not actions:
-        logger.warning(
-            "discord_server: config allowlist/intents left zero available actions; "
-            "hiding tool from this session."
-        )
        return None
-    return _build_schema(actions, caps)
+    return _build_schema(actions, caps, tool_name=tool_name)
+
+
+def get_dynamic_schema_core() -> Optional[Dict[str, Any]]:
+    return _get_dynamic_schema(_CORE_ACTIONS, "discord")
+
+
+def get_dynamic_schema_admin() -> Optional[Dict[str, Any]]:
+    return _get_dynamic_schema(_ADMIN_ACTIONS, "discord_admin")
+
+
+def get_dynamic_schema() -> Optional[Dict[str, Any]]:
+    """Backward-compat wrapper — returns core schema."""
+    return get_dynamic_schema_core()


 # ---------------------------------------------------------------------------
@@ -774,11 +801,13 @@ def check_discord_tool_requirements() -> bool:


 # ---------------------------------------------------------------------------
-# Main handler
+# Handlers
 # ---------------------------------------------------------------------------

-def discord_server(
+def _run_discord_action(
    action: str,
+    valid_actions: Dict[str, Any],
+    tool_label: str,
    guild_id: str = "",
    channel_id: str = "",
    user_id: str = "",
@@ -790,18 +819,17 @@ def discord_server(
    before: str = "",
    after: str = "",
    auto_archive_duration: int = 1440,
-    task_id: str = None,
 ) -> str:
-    """Execute a Discord server action."""
+    """Shared handler logic for both discord tools."""
    token = _get_bot_token()
    if not token:
        return json.dumps({"error": "DISCORD_BOT_TOKEN not configured."})

-    action_fn = _ACTIONS.get(action)
+    action_fn = valid_actions.get(action)
    if not action_fn:
        return json.dumps({
            "error": f"Unknown action: {action}",
-            "available_actions": list(_ACTIONS.keys()),
+            "available_actions": list(valid_actions.keys()),
        })

    # Config-level allowlist gate (defense in depth — schema already filtered,
@@ -848,44 +876,64 @@ def discord_server(
            auto_archive_duration=auto_archive_duration,
        )
    except DiscordAPIError as e:
-        logger.warning("Discord API error in action '%s': %s", action, e)
+        logger.warning("Discord API error in %s action '%s': %s", tool_label, action, e)
        if e.status == 403:
            return json.dumps({"error": _enrich_403(action, e.body)})
        return json.dumps({"error": str(e)})
    except Exception as e:
-        logger.exception("Unexpected error in discord_server action '%s'", action)
+        logger.exception("Unexpected error in %s action '%s'", tool_label, action)
        return json.dumps({"error": f"Unexpected error: {e}"})


+def discord_core(action: str, **kwargs) -> str:
+    """Execute a core Discord action (fetch_messages, search_members, create_thread)."""
+    return _run_discord_action(action, _CORE_ACTIONS, "discord", **kwargs)
+
+
+def discord_admin_handler(action: str, **kwargs) -> str:
+    """Execute a Discord admin action (server management)."""
+    return _run_discord_action(action, _ADMIN_ACTIONS, "discord_admin", **kwargs)
+
+
 # ---------------------------------------------------------------------------
 # Tool registration
 # ---------------------------------------------------------------------------

-# Register with the full unfiltered schema. ``model_tools.get_tool_definitions``
-# rebuilds this per-session via ``get_dynamic_schema`` so the model only ever
-# sees intent-available, config-allowed actions. The static registration is a
-# safe baseline for tools that inspect the registry directly.
-_STATIC_SCHEMA = _build_schema(list(_ACTIONS.keys()), caps={"detected": False})
+_HANDLER_DEFAULTS = {
+    "action": "", "guild_id": "", "channel_id": "", "user_id": "",
+    "role_id": "", "message_id": "", "query": "", "name": "",
+    "limit": 50, "before": "", "after": "", "auto_archive_duration": 1440,
+}
+
+
+def _make_handler(handler_fn):
+    """Create a registry-compatible handler lambda for a discord handler."""
+    return lambda args, **kw: handler_fn(
+        **{k: args.get(k, v) for k, v in _HANDLER_DEFAULTS.items()},
+    )
+
+
+_STATIC_CORE_SCHEMA = _build_schema(
+    list(_CORE_ACTIONS.keys()), caps={"detected": False}, tool_name="discord",
+)
+_STATIC_ADMIN_SCHEMA = _build_schema(
+    list(_ADMIN_ACTIONS.keys()), caps={"detected": False}, tool_name="discord_admin",
+)

 registry.register(
-    name="discord_server",
+    name="discord",
    toolset="discord",
-    schema=_STATIC_SCHEMA,
-    handler=lambda args, **kw: discord_server(
-        action=args.get("action", ""),
-        guild_id=args.get("guild_id", ""),
-        channel_id=args.get("channel_id", ""),
-        user_id=args.get("user_id", ""),
-        role_id=args.get("role_id", ""),
-        message_id=args.get("message_id", ""),
-        query=args.get("query", ""),
-        name=args.get("name", ""),
-        limit=args.get("limit", 50),
-        before=args.get("before", ""),
-        after=args.get("after", ""),
-        auto_archive_duration=args.get("auto_archive_duration", 1440),
-        task_id=kw.get("task_id"),
-    ),
+    schema=_STATIC_CORE_SCHEMA,
+    handler=_make_handler(discord_core),
+    check_fn=check_discord_tool_requirements,
+    requires_env=["DISCORD_BOT_TOKEN"],
+)
+
+registry.register(
+    name="discord_admin",
+    toolset="discord_admin",
+    schema=_STATIC_ADMIN_SCHEMA,
+    handler=_make_handler(discord_admin_handler),
    check_fn=check_discord_tool_requirements,
    requires_env=["DISCORD_BOT_TOKEN"],
 )
@@ -58,10 +58,20 @@ MAX_OUTPUT_CHARS = 200_000      # 200KB rolling output buffer
 FINISHED_TTL_SECONDS = 1800     # Keep finished processes for 30 minutes
 MAX_PROCESSES = 64              # Max concurrent tracked processes (LRU pruning)

-# Watch pattern rate limiting
-WATCH_MAX_PER_WINDOW = 8        # Max notifications delivered per window
-WATCH_WINDOW_SECONDS = 10       # Rolling window length
-WATCH_OVERLOAD_KILL_SECONDS = 45  # Sustained overload duration before disabling watch
+# Watch pattern rate limiting — PER SESSION.
+# Hard rule: at most ONE watch-match notification every WATCH_MIN_INTERVAL_SECONDS.
+# Any match arriving inside that cooldown window is dropped and counted as a strike.
+# After WATCH_STRIKE_LIMIT consecutive strike windows, watch_patterns for that
+# session is permanently disabled and the session falls back to notify_on_complete
+# semantics (one notification when the process actually exits).
+WATCH_MIN_INTERVAL_SECONDS = 15   # Minimum spacing between consecutive watch matches
+WATCH_STRIKE_LIMIT = 3            # Strikes in a row → disable watch + promote to notify_on_complete
+
+# Global circuit breaker — across all sessions. Secondary safety net so concurrent
+# siblings can't collectively flood the user even when each is under its own cap.
+WATCH_GLOBAL_MAX_PER_WINDOW = 15
+WATCH_GLOBAL_WINDOW_SECONDS = 10
+WATCH_GLOBAL_COOLDOWN_SECONDS = 30


 def format_uptime_short(seconds: int) -> str:
@@ -105,10 +115,18 @@ class ProcessSession:
    watch_patterns: List[str] = field(default_factory=list)
    _watch_hits: int = field(default=0, repr=False)          # total matches delivered
    _watch_suppressed: int = field(default=0, repr=False)    # matches dropped by rate limit
-    _watch_overload_since: float = field(default=0.0, repr=False)  # when sustained overload began
-    _watch_disabled: bool = field(default=False, repr=False) # permanently killed by overload
-    _watch_window_hits: int = field(default=0, repr=False)   # hits in current rate window
-    _watch_window_start: float = field(default=0.0, repr=False)
+    _watch_disabled: bool = field(default=False, repr=False) # permanently killed after strike limit
+    # Per-session rate limit state: at most one match every WATCH_MIN_INTERVAL_SECONDS.
+    # When an emission happens, _watch_cooldown_until is set to now + interval and
+    # _watch_strike_candidate becomes True. The next match to arrive before that
+    # deadline counts as one strike (regardless of how many matches were dropped in
+    # between — a strike is a window, not a match). After WATCH_STRIKE_LIMIT strikes
+    # in a row, watch_patterns is disabled and the session promotes to
+    # notify_on_complete.
+    _watch_last_emit_at: float = field(default=0.0, repr=False)
+    _watch_cooldown_until: float = field(default=0.0, repr=False)
+    _watch_strike_candidate: bool = field(default=False, repr=False)
+    _watch_consecutive_strikes: int = field(default=0, repr=False)
    _lock: threading.Lock = field(default_factory=threading.Lock)
    _reader_thread: Optional[threading.Thread] = field(default=None, repr=False)
    _pty: Any = field(default=None, repr=False)  # ptyprocess handle (when use_pty=True)
@@ -151,6 +169,15 @@ class ProcessRegistry:
        # via wait/poll/log.  Drain loops skip notifications for these.
        self._completion_consumed: set = set()

+        # Global watch-match circuit breaker — across all sessions.
+        # Prevents sibling processes from collectively flooding the user even
+        # when each stays under its own per-session cap.
+        self._global_watch_lock = threading.Lock()
+        self._global_watch_window_start: float = 0.0
+        self._global_watch_window_hits: int = 0
+        self._global_watch_tripped_until: float = 0.0
+        self._global_watch_suppressed_during_trip: int = 0
+
    @staticmethod
    def _clean_shell_noise(text: str) -> str:
        """Strip shell startup warnings from the beginning of output."""
@@ -163,12 +190,23 @@ class ProcessRegistry:
        """Scan new output for watch patterns and queue notifications.

        Called from reader threads with new_text being the freshly-read chunk.
-        Rate-limited: max WATCH_MAX_PER_WINDOW notifications per WATCH_WINDOW_SECONDS.
-        If sustained overload exceeds WATCH_OVERLOAD_KILL_SECONDS, watching is
-        disabled permanently for this process.
+
+        Per-session rate limit: at most ONE watch-match notification per
+        WATCH_MIN_INTERVAL_SECONDS. Any match arriving inside the cooldown
+        window is dropped and counts as ONE strike for that window. After
+        WATCH_STRIKE_LIMIT consecutive strike windows, watch_patterns is
+        disabled for this session and the session is promoted to
+        notify_on_complete semantics — one notification when the process
+        actually exits, no more mid-process spam.
        """
        if not session.watch_patterns or session._watch_disabled:
            return
+        # Suppress-after-exit: once the reader loop has declared the process
+        # exited, any late chunk we still see is post-exit noise. Dropping these
+        # prevents the "stale notifications delivered minutes after the process
+        # ended" spam when completion_queue consumers run async.
+        if session.exited:
+            return

        # Scan new text line-by-line for pattern matches
        matched_lines = []
@@ -185,55 +223,80 @@ class ProcessRegistry:
            return

        now = time.time()
+        should_disable = False
        with session._lock:
-            # Reset window if it's expired
-            if now - session._watch_window_start >= WATCH_WINDOW_SECONDS:
-                session._watch_window_hits = 0
-                session._watch_window_start = now
-
-            # Check rate limit
-            if session._watch_window_hits >= WATCH_MAX_PER_WINDOW:
+            # Case 1: still inside the cooldown from the last emission.
+            # Count this as a strike for the current window (only once per window)
+            # and drop the event. If we've hit the strike limit, disable watch
+            # and promote to notify_on_complete.
+            if session._watch_cooldown_until and now < session._watch_cooldown_until:
                session._watch_suppressed += len(matched_lines)
+                if not session._watch_strike_candidate:
+                    # First drop in this window — count one strike.
+                    session._watch_strike_candidate = True
+                    session._watch_consecutive_strikes += 1
+                    if session._watch_consecutive_strikes >= WATCH_STRIKE_LIMIT:
+                        session._watch_disabled = True
+                        # Promote to notify_on_complete so the agent still gets
+                        # exactly one notification when the process actually ends.
+                        session.notify_on_complete = True
+                        should_disable = True
+                return_early = True
+            else:
+                # Case 2: cooldown has expired.
+                # Decide whether this window was a "clean" one (no drops) or a
+                # strike window. If no strike candidate was set during the prior
+                # cooldown, reset the consecutive-strike counter — we're back to
+                # healthy emission cadence.
+                if (
+                    session._watch_cooldown_until
+                    and not session._watch_strike_candidate
+                ):
+                    session._watch_consecutive_strikes = 0
+                session._watch_strike_candidate = False

-                # Track sustained overload for kill switch
-                if session._watch_overload_since == 0.0:
-                    session._watch_overload_since = now
-                elif now - session._watch_overload_since > WATCH_OVERLOAD_KILL_SECONDS:
-                    session._watch_disabled = True
-                    self.completion_queue.put({
-                        "session_id": session.id,
-                        "session_key": session.session_key,
-                        "command": session.command,
-                        "type": "watch_disabled",
-                        "suppressed": session._watch_suppressed,
-                        "platform": session.watcher_platform,
-                        "chat_id": session.watcher_chat_id,
-                        "user_id": session.watcher_user_id,
-                        "user_name": session.watcher_user_name,
-                        "thread_id": session.watcher_thread_id,
-                        "message": (
-                            f"Watch patterns disabled for process {session.id} — "
-                            f"too many matches ({session._watch_suppressed} suppressed). "
-                            f"Use process(action='poll') to check output manually."
-                        ),
-                    })
-                return
+                # Emit the notification and start a new cooldown window.
+                session._watch_last_emit_at = now
+                session._watch_cooldown_until = now + WATCH_MIN_INTERVAL_SECONDS
+                session._watch_hits += 1
+                suppressed = session._watch_suppressed
+                session._watch_suppressed = 0
+                return_early = False

-            # Under the rate limit — deliver notification
-            session._watch_window_hits += 1
-            session._watch_hits += 1
-            # Clear overload tracker since we got a delivery through
-            session._watch_overload_since = 0.0
-
-            # Include suppressed count if any events were dropped
-            suppressed = session._watch_suppressed
-            session._watch_suppressed = 0
+        if return_early:
+            if should_disable:
+                # Emit exactly one "watch disabled, falling back to notify_on_complete"
+                # summary event so the agent/user sees why things went quiet.
+                self.completion_queue.put({
+                    "session_id": session.id,
+                    "session_key": session.session_key,
+                    "command": session.command,
+                    "type": "watch_disabled",
+                    "suppressed": session._watch_suppressed,
+                    "platform": session.watcher_platform,
+                    "chat_id": session.watcher_chat_id,
+                    "user_id": session.watcher_user_id,
+                    "user_name": session.watcher_user_name,
+                    "thread_id": session.watcher_thread_id,
+                    "message": (
+                        f"Watch patterns disabled for process {session.id} — "
+                        f"{WATCH_STRIKE_LIMIT} consecutive rate-limit windows triggered "
+                        f"(min spacing {WATCH_MIN_INTERVAL_SECONDS}s). "
+                        f"Falling back to notify_on_complete semantics; you'll get "
+                        f"exactly one notification when the process exits."
+                    ),
+                })
+            return

        # Trim matched output to a reasonable size
        output = "\n".join(matched_lines[:20])
        if len(output) > 2000:
            output = output[:2000] + "\n...(truncated)"

+        # Global circuit breaker — across all sessions (secondary safety net).
+        if not self._global_watch_admit(now):
+            return
+
        self.completion_queue.put({
            "session_id": session.id,
            "session_key": session.session_key,
@@ -249,6 +312,93 @@ class ProcessRegistry:
            "thread_id": session.watcher_thread_id,
        })

+    def _global_watch_admit(self, now: float) -> bool:
+        """Return True if this watch_match event is allowed through the global breaker.
+
+        Semantics:
+        - If we're currently in a cooldown period, drop the event and count it.
+        - Otherwise, slide the rolling window and check the global cap.
+        - If the cap is exceeded, trip the breaker for WATCH_GLOBAL_COOLDOWN_SECONDS
+          and emit ONE summary event so the agent/user sees "N notifications were
+          suppressed" instead of getting them individually.
+        - When the cooldown ends, emit a release summary and reset counters.
+        """
+        with self._global_watch_lock:
+            # Handle cooldown expiry first so we can emit the release summary.
+            if self._global_watch_tripped_until and now >= self._global_watch_tripped_until:
+                suppressed = self._global_watch_suppressed_during_trip
+                self._global_watch_tripped_until = 0.0
+                self._global_watch_suppressed_during_trip = 0
+                self._global_watch_window_start = now
+                self._global_watch_window_hits = 0
+                if suppressed > 0:
+                    # Queue a summary event outside the lock (below).
+                    release_msg = {
+                        "session_id": "",
+                        "session_key": "",
+                        "command": "",
+                        "type": "watch_overflow_released",
+                        "suppressed": suppressed,
+                        "message": (
+                            f"Watch-pattern notifications resumed. "
+                            f"{suppressed} match event(s) were suppressed during the flood."
+                        ),
+                        "platform": "",
+                        "chat_id": "",
+                        "user_id": "",
+                        "user_name": "",
+                        "thread_id": "",
+                    }
+                else:
+                    release_msg = None
+            else:
+                release_msg = None
+
+            # Still in cooldown — drop and count.
+            if self._global_watch_tripped_until and now < self._global_watch_tripped_until:
+                self._global_watch_suppressed_during_trip += 1
+                admit = False
+                trip_now = None
+            else:
+                # Slide the window.
+                if now - self._global_watch_window_start >= WATCH_GLOBAL_WINDOW_SECONDS:
+                    self._global_watch_window_start = now
+                    self._global_watch_window_hits = 0
+
+                if self._global_watch_window_hits >= WATCH_GLOBAL_MAX_PER_WINDOW:
+                    # Trip the breaker.
+                    self._global_watch_tripped_until = now + WATCH_GLOBAL_COOLDOWN_SECONDS
+                    self._global_watch_suppressed_during_trip += 1
+                    trip_now = now
+                    admit = False
+                else:
+                    self._global_watch_window_hits += 1
+                    trip_now = None
+                    admit = True
+
+        # Queue summary events outside the lock.
+        if release_msg is not None:
+            self.completion_queue.put(release_msg)
+        if trip_now is not None:
+            self.completion_queue.put({
+                "session_id": "",
+                "session_key": "",
+                "command": "",
+                "type": "watch_overflow_tripped",
+                "message": (
+                    f"Watch-pattern overflow: >{WATCH_GLOBAL_MAX_PER_WINDOW} "
+                    f"notifications in {WATCH_GLOBAL_WINDOW_SECONDS}s across all processes. "
+                    f"Suppressing further watch_match events for "
+                    f"{WATCH_GLOBAL_COOLDOWN_SECONDS}s."
+                ),
+                "platform": "",
+                "chat_id": "",
+                "user_id": "",
+                "user_name": "",
+                "thread_id": "",
+            })
+        return admit
+
    @staticmethod
    def _is_host_pid_alive(pid: Optional[int]) -> bool:
        """Best-effort liveness check for host-visible PIDs."""
@@ -1388,6 +1388,33 @@ def _foreground_background_guidance(command: str) -> str | None:
    return None


+def _resolve_notification_flag_conflict(
+    *,
+    notify_on_complete: bool,
+    watch_patterns,
+    background: bool,
+) -> tuple:
+    """Decide what to do when both notify_on_complete and watch_patterns are set.
+
+    These flags produce duplicate, delayed notifications when combined — one
+    notification per watch-pattern match AND one on process exit, with async
+    delivery that can spam the user long after the process ends. When both are
+    set, we drop watch_patterns in favor of notify_on_complete (the more useful
+    "let me know when it's done" signal) and return a human-readable note.
+
+    Returns:
+        (watch_patterns_to_use, conflict_note). conflict_note is "" when there
+        is no conflict.
+    """
+    if background and notify_on_complete and watch_patterns:
+        note = (
+            "watch_patterns ignored because notify_on_complete=True; "
+            "these two flags produce duplicate notifications when combined"
+        )
+        return None, note
+    return watch_patterns, ""
+
+
 def terminal_tool(
    command: str,
    background: bool = False,
@@ -1410,8 +1437,8 @@ def terminal_tool(
        force: If True, skip dangerous command check (use after user confirms)
        workdir: Working directory for this command (optional, uses session cwd if not set)
        pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
-        notify_on_complete: If True and background=True, auto-notify the agent when the process exits
-        watch_patterns: List of strings to watch for in background output; fires a notification on first match per pattern. Use ONLY for mid-process signals (errors, readiness markers) that appear before exit. For end-of-run markers use notify_on_complete instead — stacking both produces duplicate, delayed notifications.
+        notify_on_complete: If True and background=True, you'll be notified exactly once when the process exits. The right choice for almost every long task. MUTUALLY EXCLUSIVE with watch_patterns.
+        watch_patterns: List of strings to watch for in background output. HARD rate limit: 1 notification per 15s per process. After 3 strike windows in a row, watch_patterns is disabled and the session is auto-promoted to notify_on_complete. Use ONLY for rare, one-shot mid-process signals on long-lived processes (server readiness, migration-done markers). NEVER use in loops/batch jobs — error patterns there will hit the strike limit and get disabled. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both.

    Returns:
        str: JSON string with output, exit_code, and error fields
@@ -1701,6 +1728,22 @@ def terminal_tool(
                        proc_session.watcher_user_name = _gw_user_name
                        proc_session.watcher_thread_id = _gw_thread_id

+                # Mutual exclusion: if both notify_on_complete and watch_patterns
+                # are set, drop watch_patterns. The combination produces duplicate
+                # notifications (one per match + one on exit) that deliver
+                # asynchronously and can spam the user long after the process ends.
+                # notify_on_complete is the more useful signal for "let me know
+                # when the task finishes"; watch_patterns should be reserved for
+                # standalone mid-process signals on long-lived processes.
+                watch_patterns, conflict_note = _resolve_notification_flag_conflict(
+                    notify_on_complete=bool(notify_on_complete),
+                    watch_patterns=watch_patterns,
+                    background=bool(background),
+                )
+                if conflict_note:
+                    logger.warning("background proc %s: %s", proc_session.id, conflict_note)
+                    result_data["watch_patterns_ignored"] = conflict_note
+
                # Mark for agent notification on completion
                if notify_on_complete and background:
                    proc_session.notify_on_complete = True
@@ -2039,13 +2082,13 @@ TERMINAL_SCHEMA = {
            },
            "notify_on_complete": {
                "type": "boolean",
-                "description": "When true (and background=true), you'll be automatically notified when the process finishes — no polling needed. Use this for tasks that take a while (tests, builds, deployments) so you can keep working on other things in the meantime.",
+                "description": "When true (and background=true), you'll be automatically notified exactly once when the process finishes. **This is the right choice for almost every long-running task** — tests, builds, deployments, multi-item batch jobs, anything that takes over a minute and has a defined end. Use this and keep working on other things; the system notifies you on exit. MUTUALLY EXCLUSIVE with watch_patterns — when both are set, watch_patterns is dropped.",
                "default": False
            },
            "watch_patterns": {
                "type": "array",
                "items": {"type": "string"},
-                "description": "Strings to watch for in background process output. Fires a notification the first time each pattern matches a line of output. **Use ONLY for mid-process signals** you want to react to before the process exits — errors, readiness markers, intermediate step markers (e.g. [\"ERROR\", \"Traceback\", \"listening on port\"]). Do NOT use for end-of-run markers (summary headers, 'DONE', 'PASS' printed right before exit) — use `notify_on_complete` for that instead. Stacking end-of-run patterns on top of `notify_on_complete` produces duplicate, delayed notifications that arrive after you've already moved on, since delivery is asynchronous and continues after the process exits."
+                "description": "Strings to watch for in background process output. HARD RATE LIMIT: at most 1 notification per 15 seconds per process — matches arriving inside the cooldown are dropped. After 3 consecutive 15-second windows with dropped matches, watch_patterns is automatically disabled for that process and promoted to notify_on_complete behavior (one notification on exit, no more mid-process spam). USE ONLY for truly rare, one-shot mid-process signals on LONG-LIVED processes that will never exit on their own — e.g. ['Application startup complete'] on a server so you know when to hit its endpoint, or ['migration done'] on a daemon. DO NOT use for: (1) end-of-run markers like 'DONE'/'PASS' — use notify_on_complete instead; (2) error patterns like 'ERROR'/'Traceback' in loops or multi-item batch jobs — they fire on every iteration and you'll hit the strike limit fast; (3) anything you'd ever combine with notify_on_complete. When in doubt, choose notify_on_complete. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both."
            }
        },
        "required": ["command"]
@@ -202,6 +202,18 @@ TOOLSETS = {
        "includes": []
    },

+    "discord": {
+        "description": "Discord read and participate tools (fetch messages, search members, create threads)",
+        "tools": ["discord"],
+        "includes": [],
+    },
+
+    "discord_admin": {
+        "description": "Discord server management (list channels/roles, pin messages, assign roles)",
+        "tools": ["discord_admin"],
+        "includes": [],
+    },
+
    "feishu_doc": {
        "description": "Read Feishu/Lark document content",
        "tools": ["feishu_doc_read"],
@@ -326,8 +338,8 @@ TOOLSETS = {
    "hermes-discord": {
        "description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)",
        "tools": _HERMES_CORE_TOOLS + [
-            # Discord server introspection & management (gated on DISCORD_BOT_TOKEN via check_fn)
-            "discord_server",
+            "discord",
+            "discord_admin",
        ],
        "includes": []
    },
@@ -388,7 +400,13 @@ TOOLSETS = {

    "hermes-feishu": {
        "description": "Feishu/Lark bot toolset - enterprise messaging via Feishu/Lark (full access)",
-        "tools": _HERMES_CORE_TOOLS,
+        "tools": _HERMES_CORE_TOOLS + [
+            "feishu_doc_read",
+            "feishu_drive_list_comments",
+            "feishu_drive_list_comment_replies",
+            "feishu_drive_reply_comment",
+            "feishu_drive_add_comment",
+        ],
        "includes": []
    },

@@ -560,17 +560,55 @@ def resolve_skin() -> dict:


 def _resolve_model() -> str:
-    env = os.environ.get("HERMES_MODEL", "")
+    env = (
+        os.environ.get("HERMES_MODEL", "")
+        or os.environ.get("HERMES_INFERENCE_MODEL", "")
+    ).strip()
    if env:
        return env
    m = _load_cfg().get("model", "")
    if isinstance(m, dict):
-        return m.get("default", "")
+        return str(m.get("default", "") or "").strip()
    if isinstance(m, str) and m:
-        return m
+        return m.strip()
    return "anthropic/claude-sonnet-4"


+def _resolve_startup_runtime() -> tuple[str, str | None]:
+    model = _resolve_model()
+    explicit_provider = os.environ.get("HERMES_TUI_PROVIDER", "").strip()
+    if explicit_provider:
+        return model, explicit_provider
+
+    explicit_model = (
+        os.environ.get("HERMES_MODEL", "")
+        or os.environ.get("HERMES_INFERENCE_MODEL", "")
+    ).strip()
+    if not explicit_model:
+        return model, None
+
+    try:
+        from hermes_cli.models import detect_static_provider_for_model
+
+        cfg = _load_cfg().get("model") or {}
+        current_provider = (
+            (
+                str(cfg.get("provider") or "").strip().lower()
+                if isinstance(cfg, dict)
+                else ""
+            )
+            or os.environ.get("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+            or "auto"
+        )
+        detected = detect_static_provider_for_model(explicit_model, current_provider)
+        if detected:
+            provider, detected_model = detected
+            return detected_model, provider
+    except Exception:
+        pass
+    return model, None
+
+
 def _write_config_key(key_path: str, value):
    cfg = _load_cfg()
    current = cfg
@@ -736,12 +774,15 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        _emit("session.info", sid, _session_info(agent))

    os.environ["HERMES_MODEL"] = result.new_model
+    os.environ["HERMES_INFERENCE_MODEL"] = result.new_model
    # Keep the process-level provider env var in sync with the user's explicit
    # choice so any ambient re-resolution (credential pool refresh, compressor
    # rebuild, aux clients) resolves to the new provider instead of the
    # original one persisted in config or env.
    if result.target_provider:
        os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
+        if os.environ.get("HERMES_TUI_PROVIDER"):
+            os.environ["HERMES_TUI_PROVIDER"] = result.target_provider
    if persist_global:
        _persist_model_switch(result)
    return {"value": result.new_model, "warning": result.warning_message or ""}
@@ -1277,9 +1318,13 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):

    cfg = _load_cfg()
    system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip()
-    runtime = resolve_runtime_provider(requested=None)
+    model, requested_provider = _resolve_startup_runtime()
+    runtime = resolve_runtime_provider(
+        requested=requested_provider,
+        target_model=model or None,
+    )
    return AIAgent(
-        model=_resolve_model(),
+        model=model,
        provider=runtime.get("provider"),
        base_url=runtime.get("base_url"),
        api_key=runtime.get("api_key"),
@@ -0,0 +1,67 @@
+# TUI Showroom
+
+Scripted demos of `ui-tui`. Workflows snapshot real ui-tui components (`MessageLine`, `Panel`, `Box`, `Text`) into ANSI and replay them through xterm.js with cinematic overlays. Recorded once, played any number of times — built for screen capture.
+
+```bash
+npm run showroom            # dev server at http://127.0.0.1:4317
+npm run showroom:record     # regenerate every workflow JSON
+npm run showroom:build      # dist/<name>.html for every workflow
+npm run showroom:type-check
+```
+
+## Bundled workflows
+
+| File                            | Shows                                                          |
+| ------------------------------- | -------------------------------------------------------------- |
+| `workflows/feature-tour.json`   | Plan → tool trail → result highlight                           |
+| `workflows/subagent-trail.json` | Parallel subagents, hot lanes, summary                         |
+| `workflows/slash-commands.json` | `/skills`, `/model`, `/agents`, `/help` typed → echoed → panel |
+| `workflows/voice-mode.json`     | VAD capture, transcript, TTS ducking                           |
+
+Pick a workflow from the dropdown or deep-link with `?w=<name>`.
+
+## Architecture
+
+```
+record.tsx           ─┐
+  ↳ MessageLine,     │  Ink renders → Writable → ANSI string
+    Panel, Box, Text │
+                     ▼
+workflows/<name>.json
+                     │  served at /api/workflow/<name>
+                     ▼
+showroom.js          │  xterm.js renders ANSI; DOM overlays target frame ids
+                     ▼
+browser
+```
+
+`frame` actions embed ANSI from an Ink render; the browser feeds them into `@xterm/xterm` (CDN, cached) so the surface is the actual TUI. Captions, spotlights, highlights, and fades are DOM overlays anchored to frame `id`s.
+
+## Timeline actions
+
+| Action      | Required         | Optional                                      |
+| ----------- | ---------------- | --------------------------------------------- |
+| `frame`     | `ansi`           | `id`                                          |
+| `status`    | `text`           | `detail`                                      |
+| `compose`   | `text`           | `duration` (typewriter)                       |
+| `caption`   | `target`, `text` | `position` (`left`/`right`/`top`), `duration` |
+| `spotlight` | `target`         | `pad`, `duration`                             |
+| `highlight` | `target`         | `duration`                                    |
+| `fade`      | `target`         | `to` (default `0`), `duration`                |
+| `clear`     | —                | —                                             |
+
+`target` references the `id` of an earlier `frame`. `viewport.scale` (or the 1x–4x picker) controls the upscale factor for capture.
+
+## Player
+
+- Restart (`R`), 1x–4x scale, 0.5x/1x/2x speed (`1`/`2`/`3`).
+- Progress bar reads `at + duration` from the slowest action.
+
+## Adding a workflow
+
+1. Add a scene fn to `record.tsx` returning `{ title, viewport, composer, timeline }`.
+2. Compose Ink primitives or pull `MessageLine` / `Panel` from `../src`.
+3. `await snap(<Component />)` for each frame.
+4. `npm run showroom:record`.
+
+Components must be state-free at first paint — `useEffect` hooks won't fire by the time the recorder unmounts. For accordions like the live `ToolTrail`, render a flat `Box` + `Text` scene instead.
@@ -0,0 +1,70 @@
+import { mkdirSync, writeFileSync } from 'node:fs'
+import { dirname, join, resolve } from 'node:path'
+
+import { listWorkflows, readWorkflow, renderPage, showroomRoot } from './page.js'
+
+const FLAG_VALUES = new Set<string>([])
+
+const positionals = (() => {
+  const argv = process.argv.slice(2)
+  const out: string[] = []
+
+  for (let i = 0; i < argv.length; i++) {
+    const value = argv[i]!
+
+    if (FLAG_VALUES.has(value)) {
+      i += 1
+      continue
+    }
+
+    if (value.startsWith('-')) {
+      continue
+    }
+
+    out.push(value)
+  }
+
+  return out
+})()
+
+const explicitWorkflow = positionals[0]
+const explicitOut = positionals[1]
+const distDir = resolve(showroomRoot, 'dist')
+
+const writeHtml = (path: string, html: string) => {
+  mkdirSync(dirname(path), { recursive: true })
+  writeFileSync(path, html)
+}
+
+const buildAll = () => {
+  const catalog = listWorkflows()
+
+  for (const entry of catalog) {
+    const html = renderPage({ name: entry.name, workflow: readWorkflow(entry.path) }, catalog)
+    const out = join(distDir, `${entry.name}.html`)
+
+    writeHtml(out, html)
+    console.log(out)
+  }
+
+  if (catalog.length) {
+    const indexEntry = catalog.find(w => w.name === 'feature-tour') ?? catalog[0]!
+    const html = renderPage({ name: indexEntry.name, workflow: readWorkflow(indexEntry.path) }, catalog)
+    const out = join(distDir, 'index.html')
+
+    writeHtml(out, html)
+    console.log(out)
+  }
+}
+
+if (explicitWorkflow) {
+  const path = resolve(process.cwd(), explicitWorkflow)
+  const out = resolve(process.cwd(), explicitOut ?? join(distDir, 'index.html'))
+  const catalog = listWorkflows()
+  const html = renderPage({ name: 'override', workflow: readWorkflow(path) }, catalog)
+
+  writeHtml(out, html)
+  console.log(out)
+} else {
+  buildAll()
+}
@@ -0,0 +1,58 @@
+import { readdirSync, readFileSync, statSync } from 'node:fs'
+import { dirname, join, parse } from 'node:path'
+import { fileURLToPath } from 'node:url'
+
+export const showroomRoot = dirname(fileURLToPath(import.meta.url))
+export const workflowsDir = join(showroomRoot, 'workflows')
+
+export interface WorkflowEntry {
+  name: string
+  path: string
+  title: string
+}
+
+export const listWorkflows = (): WorkflowEntry[] =>
+  readdirSync(workflowsDir)
+    .filter(file => file.endsWith('.json') && statSync(join(workflowsDir, file)).isFile())
+    .map(file => {
+      const path = join(workflowsDir, file)
+      const data = JSON.parse(readFileSync(path, 'utf8'))
+
+      return { name: parse(file).name, path, title: String(data.title ?? parse(file).name) }
+    })
+    .sort((a, b) => a.name.localeCompare(b.name))
+
+export const defaultWorkflowPath =
+  listWorkflows().find(w => w.name === 'feature-tour')?.path ?? listWorkflows()[0]?.path ?? ''
+
+export const readWorkflow = (path = defaultWorkflowPath) => JSON.parse(readFileSync(path, 'utf8'))
+
+export const renderPage = (initial: { name: string; workflow: unknown }, catalog: WorkflowEntry[]) => {
+  const css = readFileSync(join(showroomRoot, 'src', 'showroom.css'), 'utf8')
+  const js = readFileSync(join(showroomRoot, 'src', 'showroom.js'), 'utf8')
+  const safeCatalog = catalog.map(({ name, title }) => ({ name, title }))
+  const initialJson = JSON.stringify(initial).replace(/</g, '\\u003c')
+  const catalogJson = JSON.stringify(safeCatalog).replace(/</g, '\\u003c')
+
+  return `<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>Hermes TUI Showroom</title>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@xterm/xterm@6.0.0/css/xterm.css" />
+    <style>${css}</style>
+  </head>
+  <body>
+    <main id="showroom"></main>
+    <script>
+      window.__SHOWROOM_INITIAL__ = ${initialJson};
+      window.__SHOWROOM_CATALOG__ = ${catalogJson};
+    </script>
+    <script type="importmap">
+    { "imports": { "@xterm/": "https://cdn.jsdelivr.net/npm/@xterm/xterm@6.0.0/" } }
+    </script>
+    <script type="module">${js}</script>
+  </body>
+</html>`
+}
@@ -0,0 +1,802 @@
+import { rmSync, writeFileSync } from 'node:fs'
+import { dirname, join } from 'node:path'
+import { Writable } from 'node:stream'
+import { fileURLToPath } from 'node:url'
+
+import React from 'react'
+
+import { Box, render, Text } from '@hermes/ink'
+
+import { Panel } from '../src/components/branding.js'
+import { MessageLine } from '../src/components/messageLine.js'
+import type { Theme } from '../src/theme.js'
+import { DEFAULT_THEME } from '../src/theme.js'
+import type { Msg } from '../src/types.js'
+
+const showroomRoot = dirname(fileURLToPath(import.meta.url))
+
+class Capture extends Writable {
+  buffer = ''
+  isTTY = true
+  columns: number
+  rows: number
+
+  constructor(cols: number, rows: number) {
+    super()
+    this.columns = cols
+    this.rows = rows
+  }
+
+  override _write(chunk: any, _encoding: any, callback: any) {
+    this.buffer += chunk.toString()
+    callback()
+  }
+}
+
+const COLS = 80
+const ROWS = 16
+const t = DEFAULT_THEME
+
+const snap = async (node: React.ReactElement, settle = 120): Promise<string> => {
+  const stdout = new Capture(COLS, ROWS) as unknown as NodeJS.WriteStream
+  const inst = await render(node, { stdout, exitOnCtrlC: false, patchConsole: false })
+
+  await new Promise(resolve => setTimeout(resolve, settle))
+  inst.unmount()
+
+  return (stdout as unknown as Capture).buffer
+}
+
+const Msg = (msg: Msg) => <MessageLine cols={COLS} msg={msg} t={t} />
+
+const ToolPanel = ({ items, title, theme }: { items: string[]; theme: Theme; title: string }) => (
+  <Box flexDirection="column" marginLeft={2}>
+    <Box>
+      <Text color={theme.color.bronze}>⚡ </Text>
+      <Text bold color={theme.color.amber}>
+        {title}
+      </Text>
+      <Text color={theme.color.dim}> ({items.length})</Text>
+    </Box>
+    {items.map((item, i) => (
+      <Box key={i}>
+        <Text color={theme.color.bronze}>{i === items.length - 1 ? '└─ ' : '├─ '}</Text>
+        <Text color={theme.color.dim}>{item}</Text>
+      </Box>
+    ))}
+  </Box>
+)
+
+const Tree = ({
+  rows,
+  theme
+}: {
+  rows: { branch: 'mid' | 'last'; cols: string[]; tone?: 'amber' | 'dim' | 'gold' | 'ok' }[]
+  theme: Theme
+}) => (
+  <Box flexDirection="column" marginLeft={2}>
+    {rows.map((row, i) => {
+      const stem = row.branch === 'last' ? '└─ ' : '├─ '
+      const tone =
+        row.tone === 'gold'
+          ? theme.color.gold
+          : row.tone === 'amber'
+            ? theme.color.amber
+            : row.tone === 'ok'
+              ? theme.color.ok
+              : theme.color.dim
+
+      return (
+        <Box key={i}>
+          <Text color={theme.color.bronze}>{stem}</Text>
+          <Text color={tone}>{row.cols.join('  ')}</Text>
+        </Box>
+      )
+    })}
+  </Box>
+)
+
+const writeWorkflow = (name: string, workflow: Record<string, unknown>) => {
+  const out = join(showroomRoot, 'workflows', `${name}.json`)
+  writeFileSync(out, JSON.stringify(workflow, null, 2))
+  console.log(`  wrote ${out}`)
+}
+
+const featureTour = async () => {
+  const userPrompt = await snap(<Msg role="user" text="Build a focused plan for a safer gateway approval flow." />)
+
+  const assistantPlan = await snap(
+    <Msg
+      role="assistant"
+      text="I'll trace the gateway guards first, then patch the smallest boundary that keeps approval commands live while an agent is blocked."
+    />
+  )
+
+  const toolTrail = await snap(
+    <ToolPanel
+      items={[
+        'rg "approval.request" gateway/ tui_gateway/',
+        'ReadFile gateway/run.py',
+        'ReadFile gateway/platforms/base.py'
+      ]}
+      theme={t}
+      title="tool trail"
+    />
+  )
+
+  const assistantResult = await snap(
+    <Msg
+      role="assistant"
+      text="Found the split guard. Bypass both queues only for approval commands; normal chat ordering stays intact."
+    />
+  )
+
+  return {
+    composer: 'ask hermes anything',
+    timeline: [
+      { ansi: userPrompt, at: 200, id: 'user-row', type: 'frame' },
+      { ansi: assistantPlan, at: 1500, id: 'assistant-plan', type: 'frame' },
+      { ansi: toolTrail, at: 2900, id: 'tool-trail', type: 'frame' },
+      { at: 3200, duration: 1700, target: 'tool-trail', type: 'spotlight' },
+      {
+        at: 3400,
+        duration: 1700,
+        position: 'right',
+        target: 'tool-trail',
+        text: 'Real ui-tui MessageLine + Panel rendered to ANSI and replayed via xterm.js.',
+        type: 'caption'
+      },
+      { ansi: assistantResult, at: 5400, id: 'assistant-result', type: 'frame' },
+      { at: 6100, duration: 1300, target: 'assistant-result', type: 'highlight' },
+      {
+        at: 6300,
+        duration: 1700,
+        position: 'right',
+        target: 'assistant-result',
+        text: 'Captions, spotlights, and fades layer on top of real ANSI. Best of both.',
+        type: 'caption'
+      },
+      { at: 8100, duration: 600, text: '/approve', type: 'compose' }
+    ],
+    title: 'Hermes TUI · Feature Tour',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+const subagentTrail = async () => {
+  const userPrompt = await snap(<Msg role="user" text="Run tests, lint, and a Railway preview deploy in parallel." />)
+
+  const plan = await snap(
+    <Msg role="assistant" text="Spawning three subagents on the fan-out lane and watching their tool counts." />
+  )
+
+  const live = await snap(
+    <Tree
+      rows={[
+        { branch: 'mid', cols: ['tests   running   12 tools   ⏱ 14.2s'], tone: 'amber' },
+        { branch: 'mid', cols: ['lint    running    4 tools   ⏱ 14.2s'], tone: 'amber' },
+        { branch: 'last', cols: ['deploy  queued     0 tools   ⏱  0.0s'], tone: 'dim' }
+      ]}
+      theme={t}
+    />
+  )
+
+  const hot = await snap(
+    <Tree
+      rows={[
+        { branch: 'mid', cols: ['tests   complete  18 tools   ⏱ 22.7s   ✓'], tone: 'ok' },
+        { branch: 'mid', cols: ['lint    complete   6 tools   ⏱ 18.1s   ✓'], tone: 'ok' },
+        { branch: 'last', cols: ['deploy  running    9 tools   ⏱  9.4s'], tone: 'gold' }
+      ]}
+      theme={t}
+    />
+  )
+
+  const summary = await snap(
+    <Msg role="assistant" text="All three landed: 24 tests pass, lint clean, preview at https://pr-128.railway.app." />
+  )
+
+  return {
+    composer: 'spawn the deploy fan-out',
+    timeline: [
+      { ansi: userPrompt, at: 200, id: 'ask', type: 'frame' },
+      { ansi: plan, at: 1100, id: 'plan', type: 'frame' },
+      { ansi: live, at: 2100, id: 'live', type: 'frame' },
+      { at: 2300, duration: 1500, target: 'live', type: 'spotlight' },
+      {
+        at: 2500,
+        duration: 1700,
+        position: 'right',
+        target: 'live',
+        text: 'Each subagent gets its own depth and tool budget; the dashboard tracks them live.',
+        type: 'caption'
+      },
+      { ansi: hot, at: 4400, id: 'hot', type: 'frame' },
+      { at: 4600, duration: 1300, target: 'hot', type: 'highlight' },
+      {
+        at: 4800,
+        duration: 1700,
+        position: 'right',
+        target: 'hot',
+        text: 'Completed runs collapse, hot lanes stay vivid — the eye tracks the live agent.',
+        type: 'caption'
+      },
+      { ansi: summary, at: 6800, id: 'summary', type: 'frame' },
+      {
+        at: 7000,
+        duration: 1700,
+        position: 'right',
+        target: 'summary',
+        text: 'Subagent results stream back into the parent transcript as a single highlight.',
+        type: 'caption'
+      },
+      { at: 8800, duration: 600, text: '/agents', type: 'compose' }
+    ],
+    title: 'Hermes TUI · Subagent Trail',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+const slashCommands = async () => {
+  const slashEcho = (text: string) => snap(<Msg kind="slash" role="user" text={text} />)
+
+  const skillsEcho = await slashEcho('/skills search vibe')
+  const skillsResults = await snap(
+    <Panel
+      sections={[
+        {
+          rows: [
+            ['anthropics/skills/frontend-design', '★ trusted'],
+            ['openai/skills/skill-creator', '· official'],
+            ['skills.sh/community/vibe-coding', '⚙ community']
+          ]
+        }
+      ]}
+      t={t}
+      title="skills · search vibe"
+    />,
+    180
+  )
+
+  const modelEcho = await slashEcho('/model claude-4.6-sonnet')
+  const modelSwitch = await snap(
+    <Panel
+      sections={[
+        {
+          rows: [
+            ['from', 'gpt-5-codex'],
+            ['to', 'claude-4.6-sonnet'],
+            ['scope', 'this session']
+          ]
+        }
+      ]}
+      t={t}
+      title="model switched"
+    />,
+    180
+  )
+
+  const agentsEcho = await slashEcho('/agents pause')
+  const agentsStatus = await snap(
+    <Panel
+      sections={[
+        {
+          rows: [
+            ['delegation', 'paused'],
+            ['max children', '4'],
+            ['running tasks', 'queued for resume']
+          ]
+        }
+      ]}
+      t={t}
+      title="agents · paused"
+    />,
+    180
+  )
+
+  const helpEcho = await slashEcho('/help')
+  const helpPanel = await snap(
+    <Panel
+      sections={[
+        {
+          items: ['/skills    search · install · inspect', '/model     switch model · pop picker'],
+          title: 'Tools & Skills'
+        },
+        {
+          items: [
+            '/agents    spawn-tree dashboard',
+            '/queue     queue prompt for next turn',
+            '/steer     inject after next tool call'
+          ],
+          title: 'Session'
+        },
+        {
+          items: ['/voice     toggle voice mode', '/details   thinking · tools · subagents · activity'],
+          title: 'Configuration'
+        }
+      ]}
+      t={t}
+      title="(^_^)? Commands"
+    />,
+    220
+  )
+
+  return {
+    composer: '',
+    timeline: [
+      { at: 200, duration: 700, text: '/skills search vibe', type: 'compose' },
+      { ansi: skillsEcho, at: 1100, type: 'frame' },
+      { at: 1100, duration: 200, text: '', type: 'compose' },
+      { ansi: skillsResults, at: 1400, id: 'skills', type: 'frame' },
+      {
+        at: 1700,
+        duration: 2000,
+        position: 'right',
+        target: 'skills',
+        text: 'Typed /skills, hit return — same Panel the live TUI renders.',
+        type: 'caption'
+      },
+      { at: 4000, duration: 700, text: '/model claude-4.6-sonnet', type: 'compose' },
+      { ansi: modelEcho, at: 4900, type: 'frame' },
+      { at: 4900, duration: 200, text: '', type: 'compose' },
+      { ansi: modelSwitch, at: 5200, id: 'model', type: 'frame' },
+      {
+        at: 5500,
+        duration: 1900,
+        position: 'right',
+        target: 'model',
+        text: '/model swaps mid-session; transcript and cache stay intact.',
+        type: 'caption'
+      },
+      { at: 7600, duration: 600, text: '/agents pause', type: 'compose' },
+      { ansi: agentsEcho, at: 8400, type: 'frame' },
+      { at: 8400, duration: 200, text: '', type: 'compose' },
+      { ansi: agentsStatus, at: 8700, id: 'agents', type: 'frame' },
+      {
+        at: 9000,
+        duration: 1800,
+        position: 'right',
+        target: 'agents',
+        text: 'Same registry powers TUI, gateway, Telegram, Discord — one truth.',
+        type: 'caption'
+      },
+      { at: 11000, duration: 400, text: '/help', type: 'compose' },
+      { ansi: helpEcho, at: 11500, type: 'frame' },
+      { at: 11500, duration: 200, text: '', type: 'compose' },
+      { ansi: helpPanel, at: 11800, id: 'help', type: 'frame' }
+    ],
+    title: 'Hermes TUI · Slash Commands',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+const voiceMode = async () => {
+  const vad = await snap(
+    <ToolPanel
+      items={['▮ ▮▮ ▮ ▮▮▮▮ ▮▮ ▮▮▮▮▮▮ ▮▮▮ ▮', 'rms 0.42 · 1.6s captured', 'auto-stop · silence 380ms']}
+      theme={t}
+      title="VAD · capturing"
+    />
+  )
+
+  const transcript = await snap(<Msg role="user" text="what's in my inbox today and what needs a reply before noon?" />)
+
+  const answer = await snap(
+    <Msg
+      role="assistant"
+      text="Three threads need you before noon: vendor renewal, podcast intro feedback, and the design review at 11."
+    />
+  )
+
+  const tts = await snap(
+    <ToolPanel
+      items={['voice 11labs · grace_v3', 'elapsed 4.6s · 2 chunks queued', 'ducking mic input']}
+      theme={t}
+      title="tts · playing"
+    />
+  )
+
+  return {
+    composer: 'ctrl+b to start recording',
+    timeline: [
+      { ansi: vad, at: 250, id: 'vad', type: 'frame' },
+      { at: 600, duration: 1500, target: 'vad', type: 'spotlight' },
+      {
+        at: 800,
+        duration: 1700,
+        position: 'right',
+        target: 'vad',
+        text: 'Continuous loop: VAD detects silence, transcribes, restarts — no key holds.',
+        type: 'caption'
+      },
+      { ansi: transcript, at: 2700, id: 'transcript', type: 'frame' },
+      { at: 3400, duration: 1100, target: 'transcript', type: 'highlight' },
+      {
+        at: 3600,
+        duration: 1700,
+        position: 'right',
+        target: 'transcript',
+        text: 'Transcript flows straight into the composer with the standard ❯ user glyph.',
+        type: 'caption'
+      },
+      { ansi: answer, at: 5500, id: 'answer', type: 'frame' },
+      { ansi: tts, at: 6700, id: 'tts', type: 'frame' },
+      {
+        at: 7000,
+        duration: 1700,
+        position: 'right',
+        target: 'tts',
+        text: 'TTS auto-ducks the mic so the loop never echoes itself back.',
+        type: 'caption'
+      },
+      { at: 8800, duration: 600, text: '/voice off', type: 'compose' }
+    ],
+    title: 'Hermes TUI · Voice Mode',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+// --- Static prompt mocks (no useInput, safe for snap()) ---
+
+const ApprovalPromptStatic = ({
+  command,
+  description,
+  selected = 0,
+  theme
+}: {
+  command: string
+  description: string
+  selected?: number
+  theme: Theme
+}) => {
+  const labels = ['Allow once', 'Allow this session', 'Always allow', 'Deny']
+  const lines = command.split('\n').slice(0, 5)
+
+  return (
+    <Box borderColor={theme.color.warn} borderStyle="double" flexDirection="column" paddingX={1}>
+      <Text bold color={theme.color.warn}>
+        ⚠ approval required · {description}
+      </Text>
+
+      <Box flexDirection="column" paddingLeft={1}>
+        {lines.map((line, i) => (
+          <Text color={theme.color.cornsilk} key={i}>
+            {line || ' '}
+          </Text>
+        ))}
+      </Box>
+
+      <Text />
+
+      {labels.map((label, i) => (
+        <Text key={label}>
+          <Text bold={i === selected} color={i === selected ? theme.color.warn : theme.color.dim} inverse={i === selected}>
+            {i === selected ? '▸ ' : '  '}
+            {i + 1}. {label}
+          </Text>
+        </Text>
+      ))}
+
+      <Text color={theme.color.dim}>↑/↓ select · Enter confirm · 1-4 quick pick · Ctrl+C deny</Text>
+    </Box>
+  )
+}
+
+const ClarifyPromptStatic = ({
+  choices,
+  question,
+  selected = 0,
+  theme
+}: {
+  choices: string[]
+  question: string
+  selected?: number
+  theme: Theme
+}) => (
+  <Box flexDirection="column">
+    <Text bold>
+      <Text color={theme.color.amber}>ask</Text>
+      <Text color={theme.color.cornsilk}> {question}</Text>
+    </Text>
+
+    {[...choices, 'Other (type your answer)'].map((c, i) => (
+      <Text key={i}>
+        <Text bold={i === selected} color={i === selected ? theme.color.label : theme.color.dim} inverse={i === selected}>
+          {i === selected ? '▸ ' : '  '}
+          {i + 1}. {c}
+        </Text>
+      </Text>
+    ))}
+
+    <Text color={theme.color.dim}>
+      ↑/↓ select · Enter confirm · 1-{choices.length + 1} quick pick · Esc cancel
+    </Text>
+  </Box>
+)
+
+const ModelPickerStatic = ({
+  currentModel,
+  items,
+  selected = 0,
+  stage,
+  theme
+}: {
+  currentModel: string
+  items: string[]
+  selected?: number
+  stage: 'model' | 'provider'
+  theme: Theme
+}) => (
+  <Box borderStyle="double" borderColor={theme.color.amber} flexDirection="column" paddingX={1} width={50}>
+    <Text bold color={theme.color.amber} wrap="truncate-end">
+      {stage === 'provider' ? 'Select Provider' : 'Select Model'}
+    </Text>
+
+    <Text color={theme.color.dim} wrap="truncate-end">
+      {stage === 'provider' ? `Current model: ${currentModel}` : currentModel}
+    </Text>
+
+    <Text color={theme.color.label} wrap="truncate-end">
+      {' '}
+    </Text>
+
+    <Text color={theme.color.dim}>{' '}</Text>
+
+    {items.map((item, i) => (
+      <Text
+        bold={i === selected}
+        color={i === selected ? theme.color.amber : theme.color.dim}
+        inverse={i === selected}
+        key={item}
+        wrap="truncate-end"
+      >
+        {i === selected ? '▸ ' : '  '}
+        {i + 1}. {item}
+      </Text>
+    ))}
+
+    <Text color={theme.color.dim}>{' '}</Text>
+    <Text color={theme.color.dim}>persist: session · g toggle</Text>
+    <Text color={theme.color.dim}>↑/↓ select · Enter choose · 1-9,0 quick · Esc/q cancel</Text>
+  </Box>
+)
+
+const interactivePrompts = async () => {
+  // User asks for something that triggers approval
+  const userAsk = await snap(
+    <Msg role="user" text="Run npm install express in the project root." />
+  )
+
+  const assistantExplains = await snap(
+    <Msg
+      role="assistant"
+      text="I'll install express. The package manager needs approval — here's the command."
+    />
+  )
+
+  // Approval prompt
+  const approval = await snap(
+    <ApprovalPromptStatic
+      command={'npm install express\nadded 58 packages in 3.2s\n\n+ express@5.1.0'}
+      description="install dependency"
+      theme={t}
+    />,
+    180
+  )
+
+  // After approval, user asks something ambiguous
+  const userClarify = await snap(
+    <Msg role="user" text="Deploy this to staging." />
+  )
+
+  const assistantAsks = await snap(
+    <Msg role="assistant" text="Which environment should I target?" />
+  )
+
+  // Clarify prompt
+  const clarify = await snap(
+    <ClarifyPromptStatic
+      choices={['staging-us-east', 'staging-eu-west', 'staging-ap-south']}
+      question="Which region?"
+      theme={t}
+    />,
+    180
+  )
+
+  const confirmResult = await snap(
+    <Panel
+      sections={[
+        {
+          rows: [
+            ['target', 'staging-us-east'],
+            ['branch', 'main'],
+            ['preview', 'https://pr-128.railway.app']
+          ]
+        }
+      ]}
+      t={t}
+      title="deployment queued"
+    />,
+    180
+  )
+
+  return {
+    composer: 'deploy this to staging',
+    timeline: [
+      { ansi: userAsk, at: 200, id: 'ask', type: 'frame' },
+      { ansi: assistantExplains, at: 1200, id: 'explain', type: 'frame' },
+      { ansi: approval, at: 2600, id: 'approval', type: 'frame' },
+      { at: 2900, duration: 1500, target: 'approval', type: 'spotlight' },
+      {
+        at: 3100,
+        duration: 2000,
+        position: 'right',
+        target: 'approval',
+        text: 'Approval prompts gate dangerous commands. Four options: allow once, session, always, deny.',
+        type: 'caption'
+      },
+      { at: 5400, duration: 400, text: '1', type: 'compose' },
+      { at: 5900, duration: 500, text: '', type: 'compose' },
+      { ansi: userClarify, at: 6600, id: 'clarify-ask', type: 'frame' },
+      { ansi: assistantAsks, at: 7600, id: 'clarify-reply', type: 'frame' },
+      { ansi: clarify, at: 8800, id: 'clarify', type: 'frame' },
+      { at: 9100, duration: 1500, target: 'clarify', type: 'spotlight' },
+      {
+        at: 9300,
+        duration: 2000,
+        position: 'right',
+        target: 'clarify',
+        text: 'Clarify prompts handle ambiguous requests — numbered choices or free text.',
+        type: 'caption'
+      },
+      { at: 11600, duration: 400, text: '1', type: 'compose' },
+      { ansi: confirmResult, at: 12200, id: 'result', type: 'frame' },
+      { at: 12500, duration: 1300, target: 'result', type: 'highlight' }
+    ],
+    title: 'Hermes TUI · Interactive Prompts',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+const modelPicker = async () => {
+  const userAsk = await snap(
+    <Msg role="user" text="Switch to Claude." />
+  )
+
+  const assistantReply = await snap(
+    <Msg role="assistant" text="Opening the model picker — pick a provider first, then a model." />
+  )
+
+  // Provider selection stage
+  const providers = await snap(
+    <ModelPickerStatic
+      currentModel="gpt-5-codex"
+      items={[
+        'OpenAI · 8 models',
+        'Anthropic · 6 models',
+        'Google · 5 models',
+        'OpenRouter · 42 models',
+        'xAI · 3 models'
+      ]}
+      selected={1}
+      stage="provider"
+      theme={t}
+    />,
+    180
+  )
+
+  // Model selection stage
+  const models = await snap(
+    <ModelPickerStatic
+      currentModel="Anthropic"
+      items={[
+        'claude-opus-4',
+        'claude-sonnet-4',
+        'claude-sonnet-3.7',
+        'claude-haiku-3.5',
+        'claude-sonnet-3.5'
+      ]}
+      selected={1}
+      stage="model"
+      theme={t}
+    />,
+    180
+  )
+
+  const result = await snap(
+    <Panel
+      sections={[
+        {
+          rows: [
+            ['from', 'gpt-5-codex'],
+            ['to', 'claude-sonnet-4'],
+            ['scope', 'this session']
+          ]
+        }
+      ]}
+      t={t}
+      title="model switched"
+    />,
+    180
+  )
+
+  return {
+    composer: '',
+    timeline: [
+      { at: 200, duration: 500, text: '/model', type: 'compose' },
+      { ansi: userAsk, at: 900, id: 'ask', type: 'frame' },
+      { ansi: assistantReply, at: 1800, id: 'reply', type: 'frame' },
+      { ansi: providers, at: 3000, id: 'providers', type: 'frame' },
+      { at: 3300, duration: 1800, target: 'providers', type: 'spotlight' },
+      {
+        at: 3500,
+        duration: 2000,
+        position: 'right',
+        target: 'providers',
+        text: 'Provider stage: pick from authenticated backends. Shows model count per provider.',
+        type: 'caption'
+      },
+      { at: 5600, duration: 300, text: '2', type: 'compose' },
+      { ansi: models, at: 6200, id: 'models', type: 'frame' },
+      { at: 6500, duration: 1800, target: 'models', type: 'spotlight' },
+      {
+        at: 6700,
+        duration: 2000,
+        position: 'right',
+        target: 'models',
+        text: 'Model stage: scrollable list with ▸ selection. Number keys for quick pick.',
+        type: 'caption'
+      },
+      { at: 9000, duration: 300, text: '2', type: 'compose' },
+      { ansi: result, at: 9600, id: 'result', type: 'frame' },
+      { at: 9900, duration: 1300, target: 'result', type: 'highlight' },
+      {
+        at: 10100,
+        duration: 1700,
+        position: 'right',
+        target: 'result',
+        text: 'Model swap mid-session. Transcript and cache stay intact.',
+        type: 'caption'
+      }
+    ],
+    title: 'Hermes TUI · Model Picker',
+    viewport: { cols: COLS, rows: ROWS }
+  }
+}
+
+const main = async () => {
+  console.log('recording workflows…')
+
+  // Wipe the workflows dir so deleted/renamed scenes don't linger.
+  const workflowsDir = join(showroomRoot, 'workflows')
+
+  for (const file of [
+    'feature-tour.json',
+    'subagent-trail.json',
+    'slash-commands.json',
+    'voice-mode.json',
+    'interactive-prompts.json',
+    'model-picker.json',
+    'ink-frames.json'
+  ]) {
+    try {
+      rmSync(join(workflowsDir, file))
+    } catch {
+      /* ignore */
+    }
+  }
+
+  writeWorkflow('feature-tour', await featureTour())
+  writeWorkflow('subagent-trail', await subagentTrail())
+  writeWorkflow('slash-commands', await slashCommands())
+  writeWorkflow('voice-mode', await voiceMode())
+  writeWorkflow('interactive-prompts', await interactivePrompts())
+  writeWorkflow('model-picker', await modelPicker())
+
+  console.log('done')
+}
+
+void main().catch(error => {
+  console.error(error)
+  process.exit(1)
+})
@@ -0,0 +1,109 @@
+import { createServer } from 'node:http'
+import { resolve } from 'node:path'
+
+import {
+  defaultWorkflowPath,
+  listWorkflows,
+  readWorkflow,
+  renderPage,
+  workflowsDir,
+  type WorkflowEntry
+} from './page.js'
+
+const FLAG_VALUES = new Set(['--port', '--workflow'])
+
+const arg = (name: string) => {
+  const index = process.argv.indexOf(name)
+
+  return index === -1 ? undefined : process.argv[index + 1]
+}
+
+const positional = (() => {
+  const argv = process.argv.slice(2)
+
+  for (let i = 0; i < argv.length; i++) {
+    const value = argv[i]!
+
+    if (FLAG_VALUES.has(value)) {
+      i += 1
+      continue
+    }
+
+    if (value.startsWith('-')) {
+      continue
+    }
+
+    return value
+  }
+
+  return undefined
+})()
+
+const port = Number(arg('--port') ?? process.env.PORT ?? 4317)
+const overridePath = arg('--workflow') ?? positional
+
+const pickInitial = (catalog: WorkflowEntry[], requested: null | string): WorkflowEntry => {
+  if (overridePath) {
+    const fullPath = resolve(process.cwd(), overridePath)
+
+    return { name: 'override', path: fullPath, title: requested ?? 'override' }
+  }
+
+  if (requested) {
+    const hit = catalog.find(w => w.name === requested)
+
+    if (hit) {
+      return hit
+    }
+  }
+
+  return catalog.find(w => w.path === defaultWorkflowPath) ?? catalog[0]!
+}
+
+const server = createServer((req, res) => {
+  const url = new URL(req.url ?? '/', `http://${req.headers.host}`)
+
+  if (url.pathname === '/healthz') {
+    res.writeHead(200).end('ok')
+
+    return
+  }
+
+  if (url.pathname === '/api/workflows') {
+    res.writeHead(200, { 'Content-Type': 'application/json' }).end(JSON.stringify(listWorkflows()))
+
+    return
+  }
+
+  if (url.pathname.startsWith('/api/workflow/')) {
+    const name = decodeURIComponent(url.pathname.slice('/api/workflow/'.length))
+    const hit = listWorkflows().find(w => w.name === name)
+
+    if (!hit) {
+      res.writeHead(404).end('not found')
+
+      return
+    }
+
+    res.writeHead(200, { 'Content-Type': 'application/json' }).end(JSON.stringify(readWorkflow(hit.path)))
+
+    return
+  }
+
+  try {
+    const catalog = listWorkflows()
+    const initial = pickInitial(catalog, url.searchParams.get('w'))
+    const page = renderPage({ name: initial.name, workflow: readWorkflow(initial.path) }, catalog)
+
+    res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8' }).end(page)
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error)
+
+    res.writeHead(500, { 'Content-Type': 'text/plain; charset=utf-8' }).end(message)
+  }
+})
+
+server.listen(port, '127.0.0.1', () => {
+  console.log(`showroom: http://127.0.0.1:${port}`)
+  console.log(`workflows dir: ${workflowsDir}`)
+})
@@ -0,0 +1,422 @@
+:root {
+  color-scheme: dark;
+  background: #050505;
+  font-family: Inter, ui-sans-serif, system-ui, sans-serif;
+  --gold: #ffd700;
+  --amber: #ffbf00;
+  --bronze: #cd7f32;
+  --cornsilk: #fff8dc;
+  --dim: #cc9b1f;
+  --label: #daa520;
+  --bg: #0a0a0a;
+  --bg-deep: #050505;
+  --ease-out: cubic-bezier(0.22, 1, 0.36, 1);
+  --ease-in-out: cubic-bezier(0.65, 0, 0.35, 1);
+}
+
+* {
+  box-sizing: border-box;
+}
+
+body {
+  min-height: 100vh;
+  margin: 0;
+  overflow: auto;
+  background:
+    radial-gradient(circle at 18% 12%, rgba(205, 127, 50, 0.12), transparent 36rem),
+    radial-gradient(circle at 82% 14%, rgba(255, 215, 0, 0.05), transparent 30rem),
+    var(--bg-deep);
+}
+
+#showroom {
+  min-height: 100vh;
+  padding: 24px 24px 60px;
+  display: flex;
+  justify-content: center;
+  align-items: flex-start;
+}
+
+/* --- Shell --- */
+
+.showroom-shell {
+  display: grid;
+  gap: 14px;
+  width: max-content;
+  max-width: 100%;
+  opacity: 0;
+  transform: translateY(12px);
+  transition:
+    opacity 600ms var(--ease-out),
+    transform 600ms var(--ease-out);
+}
+
+.showroom-shell.is-mounted {
+  opacity: 1;
+  transform: translateY(0);
+}
+
+/* --- Stage --- */
+
+.showroom-stage {
+  position: relative;
+  width: var(--stage-w);
+  height: var(--stage-h);
+  overflow: hidden;
+  border: 1px solid rgba(205, 127, 50, 0.45);
+  border-radius: 14px;
+  background: var(--bg);
+  box-shadow:
+    0 32px 120px rgba(0, 0, 0, 0.6),
+    0 0 0 1px rgba(255, 255, 255, 0.03) inset;
+}
+
+.showroom-terminal {
+  position: absolute;
+  inset: 0 auto auto 0;
+  display: grid;
+  grid-template-rows: auto 1fr auto;
+  width: var(--term-w);
+  height: var(--term-h);
+  transform: scale(var(--scale));
+  transform-origin: top left;
+  overflow: hidden;
+  padding: 8px 10px;
+  background: var(--bg);
+  color: var(--cornsilk);
+  font-family: 'JetBrains Mono', 'SFMono-Regular', Consolas, monospace;
+  font-size: 13px;
+  line-height: 18px;
+}
+
+/* --- Status bar --- */
+
+.showroom-status {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  min-height: 18px;
+  padding: 0 4px;
+  color: var(--dim);
+  font-size: 11px;
+  white-space: nowrap;
+}
+
+.showroom-status:empty,
+.showroom-status-left:empty,
+.showroom-status-right:empty {
+  display: none;
+}
+
+.showroom-status-left,
+.showroom-status-right {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+
+/* --- Composer --- */
+
+.showroom-composer {
+  display: flex;
+  align-items: center;
+  min-height: 22px;
+  padding: 6px 4px 0;
+  color: var(--cornsilk);
+  white-space: nowrap;
+}
+
+.showroom-composer:empty {
+  display: none;
+}
+
+.showroom-composer::before {
+  content: '❯';
+  color: var(--gold);
+  font-weight: 700;
+  margin-right: 8px;
+}
+
+.showroom-composer:not(:empty)::after {
+  content: '';
+  display: inline-block;
+  width: 7px;
+  height: 14px;
+  margin-left: 4px;
+  background: var(--gold);
+  vertical-align: middle;
+  animation: showroom-blink 1100ms steps(2) infinite;
+}
+
+@keyframes showroom-blink {
+  50% {
+    opacity: 0;
+  }
+}
+
+/* --- Body (DOM message mode) --- */
+
+.showroom-body {
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+  overflow: hidden;
+  padding: 4px 0 6px;
+}
+
+/* --- xterm container (frame mode) --- */
+
+.showroom-xterm {
+  width: 100%;
+  height: 100%;
+  overflow: hidden;
+  opacity: 0;
+  transition: opacity 300ms var(--ease-out);
+}
+
+.showroom-xterm.is-visible {
+  opacity: 1;
+}
+
+.showroom-xterm .xterm-viewport {
+  overflow: hidden !important;
+  background: transparent !important;
+}
+
+/* --- DOM-mode lines --- */
+
+.showroom-line,
+.showroom-tool {
+  opacity: 0;
+  transform: translateY(4px);
+  animation: showroom-enter 320ms var(--ease-out) forwards;
+}
+
+@keyframes showroom-enter {
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+.showroom-line {
+  display: grid;
+  grid-template-columns: 22px 1fr;
+  gap: 4px;
+}
+
+.showroom-glyph {
+  color: var(--role);
+  font-weight: 700;
+}
+
+.showroom-copy {
+  color: var(--copy);
+  white-space: pre-wrap;
+}
+
+.showroom-line-user .showroom-copy {
+  color: var(--label);
+  font-weight: 600;
+}
+
+.showroom-line-assistant .showroom-copy {
+  color: var(--cornsilk);
+}
+
+.showroom-line-system .showroom-copy {
+  color: var(--dim);
+}
+
+/* --- Tool panel --- */
+
+.showroom-tool {
+  margin-left: 22px;
+  border: 1px solid rgba(205, 127, 50, 0.32);
+  border-radius: 4px;
+  padding: 6px 10px;
+  background: rgba(205, 127, 50, 0.05);
+}
+
+.showroom-tool-title {
+  color: var(--gold);
+  font-weight: 700;
+}
+
+.showroom-tool-title::before {
+  content: '⚡ ';
+  color: var(--bronze);
+}
+
+.showroom-tool-items {
+  display: grid;
+  gap: 1px;
+  margin-top: 4px;
+  color: var(--dim);
+  font-size: 12px;
+}
+
+.showroom-tool-items div::before {
+  content: '┊ ';
+  color: var(--bronze);
+}
+
+/* --- Highlight --- */
+
+.is-highlighted {
+  filter: brightness(1.4);
+  background: rgba(255, 215, 0, 0.1);
+  transform: translateX(3px);
+  transition:
+    filter 420ms var(--ease-in-out),
+    background 420ms var(--ease-in-out),
+    transform 420ms var(--ease-in-out);
+}
+
+/* --- Overlays (captions, spotlights) --- */
+
+.showroom-overlays {
+  position: absolute;
+  inset: 0;
+  pointer-events: none;
+}
+
+.showroom-caption,
+.showroom-spotlight {
+  position: absolute;
+  opacity: 0;
+  transition:
+    opacity 360ms var(--ease-out),
+    transform 360ms var(--ease-out);
+}
+
+.showroom-caption {
+  max-width: 360px;
+  border: 1px solid rgba(205, 127, 50, 0.5);
+  border-radius: 12px;
+  padding: 12px 14px;
+  background: rgba(10, 10, 10, 0.92);
+  box-shadow: 0 18px 60px rgba(0, 0, 0, 0.5);
+  color: var(--cornsilk);
+  font-size: 14px;
+  line-height: 1.45;
+  transform: translateY(8px);
+}
+
+.showroom-spotlight {
+  border: 2px solid var(--gold);
+  border-radius: 8px;
+  box-shadow:
+    0 0 0 9999px rgba(0, 0, 0, 0.42),
+    0 0 32px rgba(255, 215, 0, 0.32);
+}
+
+.showroom-caption.is-visible,
+.showroom-spotlight.is-visible {
+  opacity: 1;
+  transform: translateY(0);
+}
+
+/* --- Picker --- */
+
+.showroom-picker {
+  appearance: none;
+  border: 1px solid rgba(205, 127, 50, 0.4);
+  border-radius: 999px;
+  padding: 6px 30px 6px 14px;
+  background: rgba(205, 127, 50, 0.06)
+    url("data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 12 8'><path d='M1 1l5 5 5-5' fill='none' stroke='%23cd7f32' stroke-width='1.5' stroke-linecap='round' stroke-linejoin='round'/></svg>")
+    no-repeat right 12px center / 10px;
+  color: var(--cornsilk);
+  font-family: 'JetBrains Mono', 'SFMono-Regular', Consolas, monospace;
+  font-size: 12px;
+  cursor: pointer;
+}
+
+.showroom-picker:focus {
+  outline: 1px solid var(--bronze);
+}
+
+/* --- Controls bar --- */
+
+.showroom-controls {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  align-items: center;
+  font-family: 'JetBrains Mono', 'SFMono-Regular', Consolas, monospace;
+  font-size: 12px;
+}
+
+.showroom-controls button {
+  border: 1px solid rgba(205, 127, 50, 0.25);
+  border-radius: 999px;
+  padding: 4px 10px;
+  background: rgba(205, 127, 50, 0.04);
+  color: var(--dim);
+  cursor: pointer;
+  font: inherit;
+}
+
+.showroom-controls button:hover {
+  background: rgba(205, 127, 50, 0.12);
+  color: var(--cornsilk);
+}
+
+.showroom-controls button[data-action='restart'] {
+  width: 28px;
+  height: 28px;
+  padding: 0;
+  font-size: 14px;
+  line-height: 1;
+}
+
+.showroom-segmented {
+  display: inline-flex;
+  border: 1px solid rgba(205, 127, 50, 0.25);
+  border-radius: 999px;
+  padding: 2px;
+  background: rgba(205, 127, 50, 0.04);
+}
+
+.showroom-segmented button {
+  border: 0;
+  border-radius: 999px;
+  padding: 3px 10px;
+  background: transparent;
+  color: var(--dim);
+  cursor: pointer;
+  font: inherit;
+}
+
+.showroom-segmented button.is-active {
+  background: rgba(255, 215, 0, 0.18);
+  color: var(--cornsilk);
+}
+
+/* --- Progress --- */
+
+.showroom-progress {
+  display: inline-flex;
+  align-items: center;
+  gap: 10px;
+  flex: 1;
+  min-width: 140px;
+  color: var(--dim);
+}
+
+.showroom-progress-track {
+  position: relative;
+  flex: 1;
+  height: 2px;
+  border-radius: 999px;
+  background: rgba(205, 127, 50, 0.1);
+  overflow: hidden;
+}
+
+.showroom-progress-fill {
+  position: absolute;
+  inset: 0 auto 0 0;
+  width: 0;
+  background: linear-gradient(90deg, var(--bronze), var(--gold));
+  transition: width 80ms linear;
+}
@@ -0,0 +1,541 @@
+import { Terminal } from '@xterm/xterm'
+
+const initial = window.__SHOWROOM_INITIAL__
+const catalog = window.__SHOWROOM_CATALOG__ ?? []
+const root = document.getElementById('showroom')
+const SPEEDS = [0.5, 1, 2]
+const SCALES = [1, 2, 3, 4]
+
+const state = {
+  body: null,
+  composer: null,
+  frameTargets: new Map(),
+  overlays: null,
+  progressFill: null,
+  progressLabel: null,
+  raf: null,
+  scale: 2,
+  shell: null,
+  speed: 1,
+  startedAt: 0,
+  statusLeft: null,
+  statusRight: null,
+  term: null,
+  termContainer: null,
+  timers: [],
+  total: 0,
+  viewport: null,
+  workflow: initial?.workflow ?? { timeline: [] }
+}
+
+const clearTimers = () => {
+  while (state.timers.length) {
+    clearTimeout(state.timers.pop())
+  }
+
+  if (state.raf) {
+    cancelAnimationFrame(state.raf)
+    state.raf = null
+  }
+}
+
+const resolveTarget = id => {
+  if (!id) {
+    return null
+  }
+
+  return state.frameTargets.get(id) ?? document.querySelector(`[data-target="${CSS.escape(id)}"]`)
+}
+
+const setText = (node, text = '', duration = 0) => {
+  if (!duration || state.speed <= 0) {
+    node.textContent = text
+
+    return
+  }
+
+  const chars = [...text]
+  const adjusted = duration / state.speed
+  const started = performance.now()
+
+  const frame = now => {
+    const n = Math.min(chars.length, Math.ceil(((now - started) / adjusted) * chars.length))
+    node.textContent = chars.slice(0, n).join('')
+
+    if (n < chars.length) {
+      requestAnimationFrame(frame)
+    }
+  }
+
+  requestAnimationFrame(frame)
+}
+
+const removeAfter = (node, duration = 1400) => {
+  const wait = duration / state.speed
+
+  state.timers.push(
+    setTimeout(() => {
+      node.classList.remove('is-visible')
+      state.timers.push(setTimeout(() => node.remove(), 420 / state.speed))
+    }, wait)
+  )
+}
+
+const rectFor = (id, pad = 8) => {
+  const el = resolveTarget(id)
+
+  if (!el || !state.overlays) {
+    return null
+  }
+
+  const stage = state.overlays.getBoundingClientRect()
+  const rect = el.getBoundingClientRect()
+
+  return {
+    height: rect.height + pad * 2,
+    left: rect.left - stage.left - pad,
+    top: rect.top - stage.top - pad,
+    width: rect.width + pad * 2
+  }
+}
+
+const placeNear = (node, id, position = 'right') => {
+  const rect = rectFor(id, 0)
+
+  if (!rect) {
+    node.style.left = '24px'
+    node.style.top = '24px'
+
+    return
+  }
+
+  const gap = 18
+  const left = position === 'left' ? rect.left - node.offsetWidth - gap : rect.left + rect.width + gap
+  const top = position === 'top' ? rect.top - node.offsetHeight - gap : rect.top
+
+  node.style.left = `${Math.max(12, left)}px`
+  node.style.top = `${Math.max(12, top)}px`
+}
+
+// --- Actions ---
+
+const message = action => {
+  const spec = {
+    assistant: { copy: '#fff8dc', glyph: '┊', tone: '#cd7f32' },
+    system: { copy: '#cc9b1f', glyph: '·', tone: '#cc9b1f' },
+    tool: { copy: '#cc9b1f', glyph: '⚡', tone: '#cd7f32' },
+    user: { copy: '#daa520', glyph: '❯', tone: '#ffd700' }
+  }[action.role] ?? { copy: '#fff8dc', glyph: '┊', tone: '#cd7f32' }
+
+  const line = document.createElement('div')
+  const glyph = document.createElement('span')
+  const copy = document.createElement('div')
+
+  line.className = `showroom-line showroom-line-${action.role ?? 'assistant'}`
+  line.dataset.target = action.id ?? ''
+  line.style.setProperty('--role', spec.tone)
+  line.style.setProperty('--copy', spec.copy)
+
+  glyph.className = 'showroom-glyph'
+  glyph.textContent = spec.glyph
+
+  copy.className = 'showroom-copy'
+
+  line.append(glyph, copy)
+  state.body.append(line)
+  setText(copy, action.text, action.duration)
+}
+
+const tool = action => {
+  const box = document.createElement('div')
+  const title = document.createElement('div')
+  const items = document.createElement('div')
+
+  box.className = 'showroom-tool'
+  box.dataset.target = action.id ?? ''
+
+  title.className = 'showroom-tool-title'
+  title.textContent = action.title ?? 'tool activity'
+
+  items.className = 'showroom-tool-items'
+
+  for (const item of action.items ?? []) {
+    const row = document.createElement('div')
+
+    row.textContent = item
+    items.append(row)
+  }
+
+  box.append(title, items)
+  state.body.append(box)
+}
+
+const frame = action => {
+  if (!state.term || !action.ansi) {
+    return
+  }
+
+  state.term.write(action.ansi)
+
+  if (action.id) {
+    state.frameTargets.set(action.id, state.termContainer)
+  }
+}
+
+const fade = action => {
+  const el = resolveTarget(action.target)
+
+  if (!el) {
+    return
+  }
+
+  el.style.transition = `opacity ${(action.duration ?? 420) / state.speed}ms var(--ease-in-out)`
+  el.style.opacity = String(action.to ?? 0)
+}
+
+const highlight = action => {
+  const el = resolveTarget(action.target)
+
+  if (!el) {
+    return
+  }
+
+  el.classList.add('is-highlighted')
+  state.timers.push(setTimeout(() => el.classList.remove('is-highlighted'), (action.duration ?? 1200) / state.speed))
+}
+
+const caption = action => {
+  const node = document.createElement('div')
+
+  node.className = 'showroom-caption'
+  node.dataset.target = action.id ?? ''
+  node.textContent = action.text ?? ''
+  state.overlays.append(node)
+  placeNear(node, action.target, action.position)
+  requestAnimationFrame(() => node.classList.add('is-visible'))
+  removeAfter(node, action.duration ?? 1600)
+}
+
+const spotlight = action => {
+  const rect = rectFor(action.target, action.pad ?? 6)
+
+  if (!rect) {
+    return
+  }
+
+  const node = document.createElement('div')
+
+  node.className = 'showroom-spotlight'
+  node.style.left = `${rect.left}px`
+  node.style.top = `${rect.top}px`
+  node.style.width = `${rect.width}px`
+  node.style.height = `${rect.height}px`
+  state.overlays.append(node)
+  requestAnimationFrame(() => node.classList.add('is-visible'))
+  removeAfter(node, action.duration ?? 1500)
+}
+
+const status = action => {
+  state.statusLeft.textContent = action.text ?? ''
+  state.statusRight.textContent = action.detail ?? ''
+}
+
+const compose = action => setText(state.composer, action.text ?? '', action.duration ?? 0)
+
+const clearTranscript = () => {
+  state.overlays.textContent = ''
+  state.frameTargets.clear()
+
+  if (state.term) {
+    state.term.reset()
+    state.term.write('\x1b[?25l')
+
+    return
+  }
+
+  state.body.textContent = ''
+}
+
+const ACTIONS = { caption, clear: clearTranscript, compose, fade, frame, highlight, message, spotlight, status, tool }
+
+// --- Progress ---
+
+const fmtTime = ms => {
+  if (!Number.isFinite(ms)) {
+    return '0.0s'
+  }
+
+  return `${(Math.max(0, ms) / 1000).toFixed(1)}s`
+}
+
+const tickProgress = () => {
+  if (!state.startedAt) {
+    return
+  }
+
+  const elapsed = Math.min(state.total, (performance.now() - state.startedAt) * state.speed)
+  const ratio = state.total ? elapsed / state.total : 0
+
+  state.progressFill.style.width = `${(ratio * 100).toFixed(2)}%`
+  state.progressLabel.textContent = `${fmtTime(elapsed)} / ${fmtTime(state.total)}`
+
+  if (elapsed < state.total) {
+    state.raf = requestAnimationFrame(tickProgress)
+  }
+}
+
+// --- xterm ---
+
+const initXterm = () => {
+  const hasFrames = (state.workflow.timeline ?? []).some(a => a.type === 'frame')
+
+  if (!hasFrames) {
+    state.term = null
+    state.termContainer = null
+
+    return
+  }
+
+  state.body.innerHTML = '<div class="showroom-xterm" data-target="terminal"></div>'
+  state.termContainer = state.body.querySelector('.showroom-xterm')
+
+  state.term = new Terminal({
+    cols: state.viewport.cols,
+    rows: state.viewport.rows,
+    fontFamily: 'JetBrains Mono, "SF Mono", Consolas, monospace',
+    fontSize: 13,
+    cursorBlink: false,
+    scrollback: 0,
+    convertEol: true,
+    allowProposedApi: true,
+    theme: {
+      background: '#0a0a0a',
+      foreground: '#fff8dc',
+      cursor: '#ffd700',
+      selectionBackground: '#3a3a55',
+      black: '#0a0a0a',
+      red: '#ef5350',
+      green: '#8fbc8f',
+      yellow: '#ffd700',
+      blue: '#5a82ff',
+      magenta: '#cd7f32',
+      cyan: '#daa520',
+      white: '#fff8dc',
+      brightBlack: '#cc9b1f',
+      brightRed: '#ef5350',
+      brightGreen: '#8fbc8f',
+      brightYellow: '#ffbf00',
+      brightBlue: '#5a82ff',
+      brightMagenta: '#cd7f32',
+      brightCyan: '#daa520',
+      brightWhite: '#fff8dc'
+    }
+  })
+
+  state.term.open(state.termContainer)
+  state.term.write('\x1b[?25l')
+
+  // Fade in
+  requestAnimationFrame(() => state.termContainer.classList.add('is-visible'))
+}
+
+// --- Playback ---
+
+const play = () => {
+  clearTimers()
+  clearTranscript()
+  state.statusLeft.textContent = ''
+  state.statusRight.textContent = ''
+  state.composer.textContent = state.workflow.composer ?? ''
+
+  const timeline = [...(state.workflow.timeline ?? [])].sort((a, b) => a.at - b.at)
+
+  state.total = timeline.reduce((max, action) => Math.max(max, action.at + (action.duration ?? 0)), 0)
+  state.startedAt = performance.now()
+  state.progressFill.style.width = '0%'
+  state.progressLabel.textContent = `0.0s / ${fmtTime(state.total)}`
+
+  for (const action of timeline) {
+    state.timers.push(setTimeout(() => ACTIONS[action.type]?.(action), action.at / state.speed))
+  }
+
+  state.raf = requestAnimationFrame(tickProgress)
+}
+
+// --- Controls ---
+
+const setSpeed = next => {
+  state.speed = next
+
+  for (const button of state.shell.querySelectorAll('[data-segment="speed"] button')) {
+    button.classList.toggle('is-active', Number(button.dataset.value) === next)
+  }
+}
+
+const setScale = next => {
+  state.scale = next
+  state.shell.style.setProperty('--scale', `${next}`)
+  state.shell.style.setProperty('--stage-w', `${state.viewport.cols * state.viewport.cellWidth * next}px`)
+  state.shell.style.setProperty('--stage-h', `${state.viewport.rows * state.viewport.lineHeight * next}px`)
+
+  for (const button of state.shell.querySelectorAll('[data-segment="scale"] button')) {
+    button.classList.toggle('is-active', Number(button.dataset.value) === next)
+  }
+}
+
+const fitScale = () => {
+  const margin = 96
+  const baseW = state.viewport.cols * state.viewport.cellWidth
+  const baseH = state.viewport.rows * state.viewport.lineHeight
+  const maxW = Math.max(1, window.innerWidth - margin)
+  const maxH = Math.max(1, window.innerHeight - 240)
+  const fit = Math.max(1, Math.floor(Math.min(maxW / baseW, maxH / baseH)))
+
+  return Math.max(1, Math.min(SCALES[SCALES.length - 1], fit))
+}
+
+const loadWorkflow = async name => {
+  const url = new URL(window.location.href)
+  url.searchParams.set('w', name)
+  window.history.replaceState(null, '', url)
+
+  try {
+    const response = await fetch(`/api/workflow/${encodeURIComponent(name)}`)
+
+    if (response.ok) {
+      state.workflow = await response.json()
+    }
+  } catch {
+    /* fall through */
+  }
+
+  await rebuild()
+}
+
+// --- DOM ---
+
+const buildOptions = () => {
+  if (!catalog.length) {
+    return ''
+  }
+
+  return catalog
+    .map(({ name, title }) => {
+      const selected = name === initial?.name ? ' selected' : ''
+
+      return `<option value="${name}"${selected}>${title}</option>`
+    })
+    .join('')
+}
+
+const buildSegmented = (values, active) =>
+  values
+    .map(
+      value =>
+        `<button type="button" data-value="${value}" class="${value === active ? 'is-active' : ''}">${value}x</button>`
+    )
+    .join('')
+
+const computeViewport = () => {
+  const fromWorkflow = state.workflow.viewport ?? {}
+
+  return {
+    cellWidth: 9,
+    cols: 80,
+    lineHeight: 19,
+    rows: 24,
+    scale: 2,
+    ...fromWorkflow
+  }
+}
+
+const renderShell = () => {
+  state.viewport = computeViewport()
+  state.frameTargets.clear()
+
+  state.shell.style.setProperty('--cell-w', `${state.viewport.cellWidth}px`)
+  state.shell.style.setProperty('--cols', `${state.viewport.cols}`)
+  state.shell.style.setProperty('--line-h', `${state.viewport.lineHeight}px`)
+  state.shell.style.setProperty('--rows', `${state.viewport.rows}`)
+  state.shell.style.setProperty('--term-w', `${state.viewport.cols * state.viewport.cellWidth}px`)
+  state.shell.style.setProperty('--term-h', `${state.viewport.rows * state.viewport.lineHeight}px`)
+
+  state.shell.innerHTML = `
+    <div class="showroom-stage">
+      <div class="showroom-terminal">
+        <div class="showroom-status" data-target="status">
+          <span class="showroom-status-left"></span>
+          <span class="showroom-status-right"></span>
+        </div>
+        <div class="showroom-body"></div>
+        <div class="showroom-composer" data-target="composer"></div>
+      </div>
+      <div class="showroom-overlays"></div>
+    </div>
+    <footer class="showroom-controls">
+      <button type="button" data-action="restart" title="restart (R)">&#8635;</button>
+      <span class="showroom-segmented" data-segment="scale">${buildSegmented(SCALES, state.scale)}</span>
+      <span class="showroom-segmented" data-segment="speed">${buildSegmented(SPEEDS, state.speed)}</span>
+      ${catalog.length > 1 ? `<select class="showroom-picker" data-action="picker">${buildOptions()}</select>` : ''}
+      <span class="showroom-progress">
+        <span data-role="time">0.0s / 0.0s</span>
+        <div class="showroom-progress-track"><div class="showroom-progress-fill"></div></div>
+      </span>
+    </footer>
+  `
+
+  state.body = state.shell.querySelector('.showroom-body')
+  state.composer = state.shell.querySelector('.showroom-composer')
+  state.overlays = state.shell.querySelector('.showroom-overlays')
+  state.statusLeft = state.shell.querySelector('.showroom-status-left')
+  state.statusRight = state.shell.querySelector('.showroom-status-right')
+  state.progressFill = state.shell.querySelector('.showroom-progress-fill')
+  state.progressLabel = state.shell.querySelector('[data-role="time"]')
+
+  state.shell.querySelector('[data-action="restart"]').addEventListener('click', play)
+
+  for (const button of state.shell.querySelectorAll('[data-segment="speed"] button')) {
+    button.addEventListener('click', () => setSpeed(Number(button.dataset.value)))
+  }
+
+  for (const button of state.shell.querySelectorAll('[data-segment="scale"] button')) {
+    button.addEventListener('click', () => setScale(Number(button.dataset.value)))
+  }
+
+  const picker = state.shell.querySelector('[data-action="picker"]')
+
+  if (picker) {
+    picker.addEventListener('change', event => {
+      void loadWorkflow(event.target.value)
+    })
+  }
+}
+
+const rebuild = async () => {
+  renderShell()
+  initXterm()
+  setScale(state.workflow.viewport?.scale ?? fitScale())
+  play()
+}
+
+const mount = () => {
+  state.shell = document.createElement('section')
+  state.shell.className = 'showroom-shell'
+  root.replaceChildren(state.shell)
+
+  void rebuild().then(() => {
+    requestAnimationFrame(() => state.shell.classList.add('is-mounted'))
+  })
+
+  window.addEventListener('keydown', event => {
+    const key = event.key.toLowerCase()
+
+    if (key === 'r') {
+      play()
+    } else if (key === '1' || key === '2' || key === '3') {
+      setSpeed(SPEEDS[Number(key) - 1])
+    }
+  })
+}
+
+mount()
@@ -0,0 +1,9 @@
+{
+  "extends": "../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "rootDir": ".",
+    "types": ["node"]
+  },
+  "include": ["*.ts"]
+}
@@ -0,0 +1,68 @@
+{
+  "composer": "ask hermes anything",
+  "timeline": [
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2CBuild\u001b[1Ca\u001b[1Cfocused\u001b[1Cplan\u001b[1Cfor\u001b[1Ca\u001b[1Csafer\u001b[1Cgateway\u001b[1Capproval\u001b[1Cflow.\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 200,
+      "id": "user-row",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CI'll\u001b[1Ctrace\u001b[1Cthe\u001b[1Cgateway\u001b[1Cguards\u001b[1Cfirst,\u001b[1Cthen\u001b[1Cpatch\u001b[1Cthe\u001b[1Csmallest\u001b[1Cboundary\u001b[1Cthat\r\n\u001b[3Ckeeps\u001b[1Capproval\u001b[1Ccommands\u001b[1Clive\u001b[1Cwhile\u001b[1Can\u001b[1Cagent\u001b[1Cis\u001b[1Cblocked.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1500,
+      "id": "assistant-plan",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\u001b[2C⚡\u001b[1Ctool\u001b[1Ctrail\u001b[1C(3)\r\n\u001b[2C├─\u001b[1Crg\u001b[1C\"approval.request\"\u001b[1Cgateway/\u001b[1Ctui_gateway/\r\n\u001b[2C├─\u001b[1CReadFile\u001b[1Cgateway/run.py\r\n\u001b[2C└─\u001b[1CReadFile\u001b[1Cgateway/platforms/base.py\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 2900,
+      "id": "tool-trail",
+      "type": "frame"
+    },
+    {
+      "at": 3200,
+      "duration": 1700,
+      "target": "tool-trail",
+      "type": "spotlight"
+    },
+    {
+      "at": 3400,
+      "duration": 1700,
+      "position": "right",
+      "target": "tool-trail",
+      "text": "Real ui-tui MessageLine + Panel rendered to ANSI and replayed via xterm.js.",
+      "type": "caption"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CFound\u001b[1Cthe\u001b[1Csplit\u001b[1Cguard.\u001b[1CBypass\u001b[1Cboth\u001b[1Cqueues\u001b[1Conly\u001b[1Cfor\u001b[1Capproval\u001b[1Ccommands;\r\n\u001b[3Cnormal\u001b[1Cchat\u001b[1Cordering\u001b[1Cstays\u001b[1Cintact.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 5400,
+      "id": "assistant-result",
+      "type": "frame"
+    },
+    {
+      "at": 6100,
+      "duration": 1300,
+      "target": "assistant-result",
+      "type": "highlight"
+    },
+    {
+      "at": 6300,
+      "duration": 1700,
+      "position": "right",
+      "target": "assistant-result",
+      "text": "Captions, spotlights, and fades layer on top of real ANSI. Best of both.",
+      "type": "caption"
+    },
+    {
+      "at": 8100,
+      "duration": 600,
+      "text": "/approve",
+      "type": "compose"
+    }
+  ],
+  "title": "Hermes TUI · Feature Tour",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -0,0 +1,104 @@
+{
+  "composer": "deploy this to staging",
+  "timeline": [
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2CRun\u001b[1Cnpm\u001b[1Cinstall\u001b[1Cexpress\u001b[1Cin\u001b[1Cthe\u001b[1Cproject\u001b[1Croot.\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 200,
+      "id": "ask",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CI'll\u001b[1Cinstall\u001b[1Cexpress.\u001b[1CThe\u001b[1Cpackage\u001b[1Cmanager\u001b[1Cneeds\u001b[1Capproval\u001b[1C—\u001b[1Chere's\u001b[1Cthe\r\n\u001b[3Ccommand.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1200,
+      "id": "explain",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╔══════════════════════════════════════════════════════════════════════════════╗\r\n║\u001b[1C⚠\u001b[1Capproval\u001b[1Crequired\u001b[1C·\u001b[1Cinstall\u001b[1Cdependency\u001b[36C║\r\n║\u001b[2Cnpm\u001b[1Cinstall\u001b[1Cexpress\u001b[57C║\r\n║\u001b[2Cadded\u001b[1C58\u001b[1Cpackages\u001b[1Cin\u001b[1C3.2s\u001b[51C║\r\n║\u001b[78C║\r\n║\u001b[2C+\u001b[1Cexpress@5.1.0\u001b[61C║\r\n║\u001b[1C▸\u001b[1C1.\u001b[1CAllow\u001b[1Conce\u001b[62C║\r\n║\u001b[3C2.\u001b[1CAllow\u001b[1Cthis\u001b[1Csession\u001b[54C║\r\n║\u001b[3C3.\u001b[1CAlways\u001b[1Callow\u001b[60C║\r\n║\u001b[3C4.\u001b[1CDeny\u001b[68C║\r\n║\u001b[1C↑/↓\u001b[1Cselect\u001b[1C·\u001b[1CEnter\u001b[1Cconfirm\u001b[1C·\u001b[1C1-4\u001b[1Cquick\u001b[1Cpick\u001b[1C·\u001b[1CCtrl+C\u001b[1Cdeny\u001b[20C║\r\n╚══════════════════════════════════════════════════════════════════════════════╝\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 2600,
+      "id": "approval",
+      "type": "frame"
+    },
+    {
+      "at": 2900,
+      "duration": 1500,
+      "target": "approval",
+      "type": "spotlight"
+    },
+    {
+      "at": 3100,
+      "duration": 2000,
+      "position": "right",
+      "target": "approval",
+      "text": "Approval prompts gate dangerous commands. Four options: allow once, session, always, deny.",
+      "type": "caption"
+    },
+    {
+      "at": 5400,
+      "duration": 400,
+      "text": "1",
+      "type": "compose"
+    },
+    {
+      "at": 5900,
+      "duration": 500,
+      "text": "",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2CDeploy\u001b[1Cthis\u001b[1Cto\u001b[1Cstaging.\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 6600,
+      "id": "clarify-ask",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CWhich\u001b[1Cenvironment\u001b[1Cshould\u001b[1CI\u001b[1Ctarget?\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 7600,
+      "id": "clarify-reply",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026hask\u001b[1CWhich\u001b[1Cregion?\r\n▸\u001b[1C1.\u001b[1Cstaging-us-east\r\n\u001b[2C2.\u001b[1Cstaging-eu-west\r\n\u001b[2C3.\u001b[1Cstaging-ap-south\r\n\u001b[2C4.\u001b[1COther\u001b[1C(type\u001b[1Cyour\u001b[1Canswer)\r\n↑/↓\u001b[1Cselect\u001b[1C·\u001b[1CEnter\u001b[1Cconfirm\u001b[1C·\u001b[1C1-4\u001b[1Cquick\u001b[1Cpick\u001b[1C·\u001b[1CEsc\u001b[1Ccancel\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 8800,
+      "id": "clarify",
+      "type": "frame"
+    },
+    {
+      "at": 9100,
+      "duration": 1500,
+      "target": "clarify",
+      "type": "spotlight"
+    },
+    {
+      "at": 9300,
+      "duration": 2000,
+      "position": "right",
+      "target": "clarify",
+      "text": "Clarify prompts handle ambiguous requests — numbered choices or free text.",
+      "type": "caption"
+    },
+    {
+      "at": 11600,
+      "duration": 400,
+      "text": "1",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[30Cdeployment\u001b[1Cqueued\u001b[31C│\r\n│\u001b[78C│\r\n│\u001b[2Ctarget\u001b[14Cstaging-us-east\u001b[41C│\r\n│\u001b[2Cbranch\u001b[14Cmain\u001b[52C│\r\n│\u001b[2Cpreview\u001b[13Chttps://pr-128.railway.app\u001b[30C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 12200,
+      "id": "result",
+      "type": "frame"
+    },
+    {
+      "at": 12500,
+      "duration": 1300,
+      "target": "result",
+      "type": "highlight"
+    }
+  ],
+  "title": "Hermes TUI · Interactive Prompts",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -0,0 +1,100 @@
+{
+  "composer": "",
+  "timeline": [
+    {
+      "at": 200,
+      "duration": 500,
+      "text": "/model",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2CSwitch\u001b[1Cto\u001b[1CClaude.\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 900,
+      "id": "ask",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2COpening\u001b[1Cthe\u001b[1Cmodel\u001b[1Cpicker\u001b[1C—\u001b[1Cpick\u001b[1Ca\u001b[1Cprovider\u001b[1Cfirst,\u001b[1Cthen\u001b[1Ca\u001b[1Cmodel.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1800,
+      "id": "reply",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╔════════════════════════════════════════════════╗\r\n║\u001b[1CSelect\u001b[1CProvider\u001b[32C║\r\n║\u001b[1CCurrent\u001b[1Cmodel:\u001b[1Cgpt-5-codex\u001b[21C║\r\n║\u001b[48C║\r\n║\u001b[48C║\r\n║\u001b[3C1.\u001b[1COpenAI\u001b[1C·\u001b[1C8\u001b[1Cmodels\u001b[25C║\r\n║\u001b[1C▸\u001b[1C2.\u001b[1CAnthropic\u001b[1C·\u001b[1C6\u001b[1Cmodels\u001b[22C║\r\n║\u001b[3C3.\u001b[1CGoogle\u001b[1C·\u001b[1C5\u001b[1Cmodels\u001b[25C║\r\n║\u001b[3C4.\u001b[1COpenRouter\u001b[1C·\u001b[1C42\u001b[1Cmodels\u001b[20C║\r\n║\u001b[3C5.\u001b[1CxAI\u001b[1C·\u001b[1C3\u001b[1Cmodels\u001b[28C║\r\n║\u001b[48C║\r\n║\u001b[1Cpersist:\u001b[1Csession\u001b[1C·\u001b[1Cg\u001b[1Ctoggle\u001b[20C║\r\n║\u001b[1C↑/↓\u001b[1Cselect\u001b[1C·\u001b[1CEnter\u001b[1Cchoose\u001b[1C·\u001b[1C1-9,0\u001b[1Cquick\u001b[1C·\u001b[6C║\r\n║\u001b[1CEsc/q\u001b[1Ccancel\u001b[35C║\r\n╚════════════════════════════════════════════════╝\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 3000,
+      "id": "providers",
+      "type": "frame"
+    },
+    {
+      "at": 3300,
+      "duration": 1800,
+      "target": "providers",
+      "type": "spotlight"
+    },
+    {
+      "at": 3500,
+      "duration": 2000,
+      "position": "right",
+      "target": "providers",
+      "text": "Provider stage: pick from authenticated backends. Shows model count per provider.",
+      "type": "caption"
+    },
+    {
+      "at": 5600,
+      "duration": 300,
+      "text": "2",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╔════════════════════════════════════════════════╗\r\n║\u001b[1CSelect\u001b[1CModel\u001b[35C║\r\n║\u001b[1CAnthropic\u001b[38C║\r\n║\u001b[48C║\r\n║\u001b[48C║\r\n║\u001b[3C1.\u001b[1Cclaude-opus-4\u001b[29C║\r\n║\u001b[1C▸\u001b[1C2.\u001b[1Cclaude-sonnet-4\u001b[27C║\r\n║\u001b[3C3.\u001b[1Cclaude-sonnet-3.7\u001b[25C║\r\n║\u001b[3C4.\u001b[1Cclaude-haiku-3.5\u001b[26C║\r\n║\u001b[3C5.\u001b[1Cclaude-sonnet-3.5\u001b[25C║\r\n║\u001b[48C║\r\n║\u001b[1Cpersist:\u001b[1Csession\u001b[1C·\u001b[1Cg\u001b[1Ctoggle\u001b[20C║\r\n║\u001b[1C↑/↓\u001b[1Cselect\u001b[1C·\u001b[1CEnter\u001b[1Cchoose\u001b[1C·\u001b[1C1-9,0\u001b[1Cquick\u001b[1C·\u001b[6C║\r\n║\u001b[1CEsc/q\u001b[1Ccancel\u001b[35C║\r\n╚════════════════════════════════════════════════╝\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 6200,
+      "id": "models",
+      "type": "frame"
+    },
+    {
+      "at": 6500,
+      "duration": 1800,
+      "target": "models",
+      "type": "spotlight"
+    },
+    {
+      "at": 6700,
+      "duration": 2000,
+      "position": "right",
+      "target": "models",
+      "text": "Model stage: scrollable list with ▸ selection. Number keys for quick pick.",
+      "type": "caption"
+    },
+    {
+      "at": 9000,
+      "duration": 300,
+      "text": "2",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[32Cmodel\u001b[1Cswitched\u001b[32C│\r\n│\u001b[78C│\r\n│\u001b[2Cfrom\u001b[16Cgpt-5-codex\u001b[45C│\r\n│\u001b[2Cto\u001b[18Cclaude-sonnet-4\u001b[41C│\r\n│\u001b[2Cscope\u001b[15Cthis\u001b[1Csession\u001b[44C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 9600,
+      "id": "result",
+      "type": "frame"
+    },
+    {
+      "at": 9900,
+      "duration": 1300,
+      "target": "result",
+      "type": "highlight"
+    },
+    {
+      "at": 10100,
+      "duration": 1700,
+      "position": "right",
+      "target": "result",
+      "text": "Model swap mid-session. Transcript and cache stay intact.",
+      "type": "caption"
+    }
+  ],
+  "title": "Hermes TUI · Model Picker",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -0,0 +1,126 @@
+{
+  "composer": "",
+  "timeline": [
+    {
+      "at": 200,
+      "duration": 700,
+      "text": "/skills search vibe",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2C/skills\u001b[1Csearch\u001b[1Cvibe\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1100,
+      "type": "frame"
+    },
+    {
+      "at": 1100,
+      "duration": 200,
+      "text": "",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[29Cskills\u001b[1C·\u001b[1Csearch\u001b[1Cvibe\u001b[29C│\r\n│\u001b[78C│\r\n│\u001b[2Canthropics/skills/frontend-design★\u001b[1Ctrusted\u001b[34C│\r\n│\u001b[2Copenai/skills/skill-creator·\u001b[1Cofficial\u001b[39C│\r\n│\u001b[2Cskills.sh/community/vibe-coding⚙\u001b[1Ccommunity\u001b[33C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1400,
+      "id": "skills",
+      "type": "frame"
+    },
+    {
+      "at": 1700,
+      "duration": 2000,
+      "position": "right",
+      "target": "skills",
+      "text": "Typed /skills, hit return — same Panel the live TUI renders.",
+      "type": "caption"
+    },
+    {
+      "at": 4000,
+      "duration": 700,
+      "text": "/model claude-4.6-sonnet",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2C/model\u001b[1Cclaude-4.6-sonnet\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 4900,
+      "type": "frame"
+    },
+    {
+      "at": 4900,
+      "duration": 200,
+      "text": "",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[32Cmodel\u001b[1Cswitched\u001b[32C│\r\n│\u001b[78C│\r\n│\u001b[2Cfrom\u001b[16Cgpt-5-codex\u001b[45C│\r\n│\u001b[2Cto\u001b[18Cclaude-4.6-sonnet\u001b[39C│\r\n│\u001b[2Cscope\u001b[15Cthis\u001b[1Csession\u001b[44C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 5200,
+      "id": "model",
+      "type": "frame"
+    },
+    {
+      "at": 5500,
+      "duration": 1900,
+      "position": "right",
+      "target": "model",
+      "text": "/model swaps mid-session; transcript and cache stay intact.",
+      "type": "caption"
+    },
+    {
+      "at": 7600,
+      "duration": 600,
+      "text": "/agents pause",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2C/agents\u001b[1Cpause\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 8400,
+      "type": "frame"
+    },
+    {
+      "at": 8400,
+      "duration": 200,
+      "text": "",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[31Cagents\u001b[1C·\u001b[1Cpaused\u001b[32C│\r\n│\u001b[78C│\r\n│\u001b[2Cdelegation\u001b[10Cpaused\u001b[50C│\r\n│\u001b[2Cmax\u001b[1Cchildren\u001b[8C4\u001b[55C│\r\n│\u001b[2Crunning\u001b[1Ctasks\u001b[7Cqueued\u001b[1Cfor\u001b[1Cresume\u001b[39C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 8700,
+      "id": "agents",
+      "type": "frame"
+    },
+    {
+      "at": 9000,
+      "duration": 1800,
+      "position": "right",
+      "target": "agents",
+      "text": "Same registry powers TUI, gateway, Telegram, Discord — one truth.",
+      "type": "caption"
+    },
+    {
+      "at": 11000,
+      "duration": 400,
+      "text": "/help",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2C/help\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 11500,
+      "type": "frame"
+    },
+    {
+      "at": 11500,
+      "duration": 200,
+      "text": "",
+      "type": "compose"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h╭──────────────────────────────────────────────────────────────────────────────╮\r\n│\u001b[78C│\r\n│\u001b[31C(^_^)?\u001b[1CCommands\u001b[32C│\r\n│\u001b[78C│\r\n│\u001b[2CTools\u001b[1C&\u001b[1CSkills\u001b[62C│\r\n│\u001b[2C/skills\u001b[4Csearch\u001b[1C·\u001b[1Cinstall\u001b[1C·\u001b[1Cinspect\u001b[39C│\r\n│\u001b[2C/model\u001b[5Cswitch\u001b[1Cmodel\u001b[1C·\u001b[1Cpop\u001b[1Cpicker\u001b[40C│\r\n│\u001b[78C│\r\n│\u001b[2CSession\u001b[69C│\r\n│\u001b[2C/agents\u001b[4Cspawn-tree\u001b[1Cdashboard\u001b[45C│\r\n│\u001b[2C/queue\u001b[5Cqueue\u001b[1Cprompt\u001b[1Cfor\u001b[1Cnext\u001b[1Cturn\u001b[39C│\r\n│\u001b[2C/steer\u001b[5Cinject\u001b[1Cafter\u001b[1Cnext\u001b[1Ctool\u001b[1Ccall\u001b[38C│\r\n│\u001b[78C│\r\n│\u001b[2CConfiguration\u001b[63C│\r\n│\u001b[2C/voice\u001b[5Ctoggle\u001b[1Cvoice\u001b[1Cmode\u001b[48C│\r\n│\u001b[2C/details\u001b[3Cthinking\u001b[1C·\u001b[1Ctools\u001b[1C·\u001b[1Csubagents\u001b[1C·\u001b[1Cactivity\u001b[26C│\r\n│\u001b[78C│\r\n╰──────────────────────────────────────────────────────────────────────────────╯\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 11800,
+      "id": "help",
+      "type": "frame"
+    }
+  ],
+  "title": "Hermes TUI · Slash Commands",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -0,0 +1,82 @@
+{
+  "composer": "spawn the deploy fan-out",
+  "timeline": [
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2CRun\u001b[1Ctests,\u001b[1Clint,\u001b[1Cand\u001b[1Ca\u001b[1CRailway\u001b[1Cpreview\u001b[1Cdeploy\u001b[1Cin\u001b[1Cparallel.\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 200,
+      "id": "ask",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CSpawning\u001b[1Cthree\u001b[1Csubagents\u001b[1Con\u001b[1Cthe\u001b[1Cfan-out\u001b[1Clane\u001b[1Cand\u001b[1Cwatching\u001b[1Ctheir\u001b[1Ctool\r\n\u001b[3Ccounts.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 1100,
+      "id": "plan",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\u001b[2C├─\u001b[1Ctests\u001b[3Crunning\u001b[3C12\u001b[1Ctools\u001b[3C⏱\u001b[1C14.2s\r\n\u001b[2C├─\u001b[1Clint\u001b[4Crunning\u001b[4C4\u001b[1Ctools\u001b[3C⏱\u001b[1C14.2s\r\n\u001b[2C└─\u001b[1Cdeploy\u001b[2Cqueued\u001b[5C0\u001b[1Ctools\u001b[3C⏱\u001b[2C0.0s\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 2100,
+      "id": "live",
+      "type": "frame"
+    },
+    {
+      "at": 2300,
+      "duration": 1500,
+      "target": "live",
+      "type": "spotlight"
+    },
+    {
+      "at": 2500,
+      "duration": 1700,
+      "position": "right",
+      "target": "live",
+      "text": "Each subagent gets its own depth and tool budget; the dashboard tracks them live.",
+      "type": "caption"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\u001b[2C├─\u001b[1Ctests\u001b[3Ccomplete\u001b[2C18\u001b[1Ctools\u001b[3C⏱\u001b[1C22.7s\u001b[3C✓\r\n\u001b[2C├─\u001b[1Clint\u001b[4Ccomplete\u001b[3C6\u001b[1Ctools\u001b[3C⏱\u001b[1C18.1s\u001b[3C✓\r\n\u001b[2C└─\u001b[1Cdeploy\u001b[2Crunning\u001b[4C9\u001b[1Ctools\u001b[3C⏱\u001b[2C9.4s\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 4400,
+      "id": "hot",
+      "type": "frame"
+    },
+    {
+      "at": 4600,
+      "duration": 1300,
+      "target": "hot",
+      "type": "highlight"
+    },
+    {
+      "at": 4800,
+      "duration": 1700,
+      "position": "right",
+      "target": "hot",
+      "text": "Completed runs collapse, hot lanes stay vivid — the eye tracks the live agent.",
+      "type": "caption"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CAll\u001b[1Cthree\u001b[1Clanded:\u001b[1C24\u001b[1Ctests\u001b[1Cpass,\u001b[1Clint\u001b[1Cclean,\u001b[1Cpreview\u001b[1Cat\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 6800,
+      "id": "summary",
+      "type": "frame"
+    },
+    {
+      "at": 7000,
+      "duration": 1700,
+      "position": "right",
+      "target": "summary",
+      "text": "Subagent results stream back into the parent transcript as a single highlight.",
+      "type": "caption"
+    },
+    {
+      "at": 8800,
+      "duration": 600,
+      "text": "/agents",
+      "type": "compose"
+    }
+  ],
+  "title": "Hermes TUI · Subagent Trail",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -0,0 +1,76 @@
+{
+  "composer": "ctrl+b to start recording",
+  "timeline": [
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\u001b[2C⚡\u001b[1CVAD\u001b[1C·\u001b[1Ccapturing\u001b[1C(3)\r\n\u001b[2C├─\u001b[1C▮\u001b[1C▮▮\u001b[1C▮\u001b[1C▮▮▮▮\u001b[1C▮▮\u001b[1C▮▮▮▮▮▮\u001b[1C▮▮▮\u001b[1C▮\r\n\u001b[2C├─\u001b[1Crms\u001b[1C0.42\u001b[1C·\u001b[1C1.6s\u001b[1Ccaptured\r\n\u001b[2C└─\u001b[1Cauto-stop\u001b[1C·\u001b[1Csilence\u001b[1C380ms\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 250,
+      "id": "vad",
+      "type": "frame"
+    },
+    {
+      "at": 600,
+      "duration": 1500,
+      "target": "vad",
+      "type": "spotlight"
+    },
+    {
+      "at": 800,
+      "duration": 1700,
+      "position": "right",
+      "target": "vad",
+      "text": "Continuous loop: VAD detects silence, transcribes, restarts — no key holds.",
+      "type": "caption"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\r\n❯\u001b[2Cwhat's\u001b[1Cin\u001b[1Cmy\u001b[1Cinbox\u001b[1Ctoday\u001b[1Cand\u001b[1Cwhat\u001b[1Cneeds\u001b[1Ca\u001b[1Creply\u001b[1Cbefore\u001b[1Cnoon?\r\n\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 2700,
+      "id": "transcript",
+      "type": "frame"
+    },
+    {
+      "at": 3400,
+      "duration": 1100,
+      "target": "transcript",
+      "type": "highlight"
+    },
+    {
+      "at": 3600,
+      "duration": 1700,
+      "position": "right",
+      "target": "transcript",
+      "text": "Transcript flows straight into the composer with the standard ❯ user glyph.",
+      "type": "caption"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h┊\u001b[2CThree\u001b[1Cthreads\u001b[1Cneed\u001b[1Cyou\u001b[1Cbefore\u001b[1Cnoon:\u001b[1Cvendor\u001b[1Crenewal,\u001b[1Cpodcast\u001b[1Cintro\u001b[1Cfeedback,\r\n\u001b[4Cand\u001b[1Cthe\u001b[1Cdesign\u001b[1Creview\u001b[1Cat\u001b[1C11.\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 5500,
+      "id": "answer",
+      "type": "frame"
+    },
+    {
+      "ansi": "\u001b[?25l\u001b[?2026h\u001b[2C⚡\u001b[1Ctts\u001b[1C·\u001b[1Cplaying\u001b[1C(3)\r\n\u001b[2C├─\u001b[1Cvoice\u001b[1C11labs\u001b[1C·\u001b[1Cgrace_v3\r\n\u001b[2C├─\u001b[1Celapsed\u001b[1C4.6s\u001b[1C·\u001b[1C2\u001b[1Cchunks\u001b[1Cqueued\r\n\u001b[2C└─\u001b[1Cducking\u001b[1Cmic\u001b[1Cinput\r\n\u001b[?2026l\u001b[?2026h\u001b[?25h\u001b[?2026l\u001b[?25h",
+      "at": 6700,
+      "id": "tts",
+      "type": "frame"
+    },
+    {
+      "at": 7000,
+      "duration": 1700,
+      "position": "right",
+      "target": "tts",
+      "text": "TTS auto-ducks the mic so the loop never echoes itself back.",
+      "type": "caption"
+    },
+    {
+      "at": 8800,
+      "duration": 600,
+      "text": "/voice off",
+      "type": "compose"
+    }
+  ],
+  "title": "Hermes TUI · Voice Mode",
+  "viewport": {
+    "cols": 80,
+    "rows": 16
+  }
+}
@@ -110,7 +110,7 @@ Current input behavior is split across `app.tsx`, `components/textInput.tsx`, an
 | `\` + `Enter`                   | Append the line to the multiline buffer (fallback for terminals without modifier support)                                                               |
 | `Ctrl+C`                        | Interrupt active run, or clear the current draft, or exit if nothing is pending                                                                         |
 | `Ctrl+D`                        | Exit                                                                                                                                                    |
-| `Ctrl+G`                        | Open `$EDITOR` with the current draft                                                                                                                   |
+| `Cmd/Ctrl+G` / `Alt+G`          | Open `$EDITOR` with the current draft (use `Alt+G` in VSCode/Cursor — they bind the primary keystroke to Find Next)                                     |
 | `Ctrl+L`                        | New session (same as `/clear`)                                                                                                                          |
 | `Ctrl+V` / `Alt+V`              | Paste text first, then fall back to image/path attachment when applicable                                                                               |
 | `Tab`                           | Apply the active completion                                                                                                                             |
@@ -169,7 +169,7 @@ Notes:
 - If you load a queued item into the input and resubmit plain text, that queue item is replaced, removed from the queue preview, and promoted to send next. If the agent is still busy, the edited item is moved to the front of the queue and sent after the current run completes.
 - Completion requests are debounced by 60 ms. Input starting with `/` uses `complete.slash`. A trailing token that starts with `./`, `../`, `~/`, `/`, or `@` uses `complete.path`.
 - Text pastes are inserted inline directly into the draft. Nothing is newline-flattened.
- `Ctrl+G` writes the current draft, including any multiline buffer, to a temp file, temporarily swaps screen buffers, launches `$EDITOR`, then restores the TUI and submits the saved text if the editor exits cleanly.
+- `Cmd/Ctrl+G` (or `Alt+G` in VSCode/Cursor, which intercept the primary keystroke for Find Next) writes the current draft, including any multiline buffer, to a temp file, suspends Ink, launches `$EDITOR`, then restores the TUI and submits the saved text if the editor exits cleanly.
 - Input history is stored in `~/.hermes/.hermes_history` or under `HERMES_HOME`.

 ## Rendering
@@ -13,7 +13,11 @@
    "fmt": "prettier --write 'src/**/*.{ts,tsx}' 'packages/**/*.{ts,tsx}'",
    "fix": "npm run lint:fix && npm run fmt",
    "test": "vitest run",
-    "test:watch": "vitest"
+    "test:watch": "vitest",
+    "showroom": "tsx .showroom/server.ts",
+    "showroom:build": "tsx .showroom/build.ts",
+    "showroom:type-check": "tsc --noEmit -p .showroom/tsconfig.json",
+    "showroom:record": "tsx .showroom/record.tsx"
  },
  "dependencies": {
    "@hermes/ink": "file:./packages/hermes-ink",
@@ -53,7 +53,11 @@ export function AlternateScreen(t0: Props) {
      }

      writeRaw(
-        ENTER_ALT_SCREEN + ERASE_SCROLLBACK + ERASE_SCREEN + CURSOR_HOME + (mouseTracking ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
+        ENTER_ALT_SCREEN +
+          ERASE_SCROLLBACK +
+          ERASE_SCREEN +
+          CURSOR_HOME +
+          (mouseTracking ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
      )
      ink?.setAltScreenActive(true, mouseTracking)

@@ -323,27 +323,39 @@ const measureTextNode = function (
  widthMode: LayoutMeasureMode
 ): { width: number; height: number } {
  const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode
+
  if (elem && elem.nodeName === 'ink-text') {
    let cache = elem._textMeasureCache
+
    if (!cache) {
      cache = { gen: 0, entries: new Map() }
      elem._textMeasureCache = cache
    }
+
    const key = `${width}|${widthMode}`
    const hit = cache.entries.get(key)
+
    if (hit && hit._gen === cache.gen) {
      return hit.result
    }
+
    const result = computeTextMeasure(node, width, widthMode)
+
    // Enforce cap with FIFO eviction to avoid unbounded growth during
    // pathological frames where yoga probes many widths.
    if (cache.entries.size >= MEASURE_CACHE_CAP) {
      const firstKey = cache.entries.keys().next().value
-      cache.entries.delete(firstKey)
+
+      if (firstKey !== undefined) {
+        cache.entries.delete(firstKey)
+      }
    }
+
    cache.entries.set(key, { _gen: cache.gen, result })
+
    return result
  }
+
  return computeTextMeasure(node, width, widthMode)
 }

@@ -475,6 +487,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => {
    for (const child of node.childNodes) {
      clearYogaNodeReferences(child)
    }
+
    node._textMeasureCache = undefined
  }

@@ -436,6 +436,13 @@ export type Screen = Size & {
   */
  noSelect: Uint8Array

+  /**
+   * Per-cell written bitmap. A written plain space and never-written padding
+   * share the same packed cell value, so selection needs this side channel to
+   * preserve code indentation without selecting blank UI margins.
+   */
+  written: Uint8Array
+
  /**
   * Per-ROW soft-wrap continuation marker. softWrap[r]=N>0 means row r
   * is a word-wrap continuation of row r-1 (the `\n` before it was
@@ -475,6 +482,14 @@ export function isEmptyCellAt(screen: Screen, x: number, y: number): boolean {
  return isEmptyCellByIndex(screen, y * screen.width + x)
 }

+export function isWrittenCellAt(screen: Screen, x: number, y: number): boolean {
+  if (x < 0 || y < 0 || x >= screen.width || y >= screen.height) {
+    return false
+  }
+
+  return screen.written[y * screen.width + x] === 1
+}
+
 /**
 * Check if a Cell (view object) represents an empty cell.
 */
@@ -533,6 +548,7 @@ export function createScreen(
    emptyStyleId: styles.none,
    damage: undefined,
    noSelect: new Uint8Array(size),
+    written: new Uint8Array(size),
    softWrap: new Int32Array(height)
  }
 }
@@ -566,6 +582,7 @@ export function resetScreen(screen: Screen, width: number, height: number): void
    screen.cells = new Int32Array(buf)
    screen.cells64 = new BigInt64Array(buf)
    screen.noSelect = new Uint8Array(size)
+    screen.written = new Uint8Array(size)
  }

  if (screen.softWrap.length < height) {
@@ -575,6 +592,7 @@ export function resetScreen(screen: Screen, width: number, height: number): void
  // Reset all cells — single fill call, no loop
  screen.cells64.fill(EMPTY_CELL_VALUE, 0, size)
  screen.noSelect.fill(0, 0, size)
+  screen.written.fill(0, 0, size)
  screen.softWrap.fill(0, 0, height)

  // Update dimensions
@@ -770,6 +788,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
      if ((cells[spacerCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
        cells[spacerCI] = EMPTY_CHAR_INDEX
        cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+        screen.written[y * screen.width + spacerX] = 0
      }
    }
  }
@@ -787,6 +806,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
      if ((cells[wideCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
        cells[wideCI] = EMPTY_CHAR_INDEX
        cells[wideCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+        screen.written[y * screen.width + x - 1] = 0
        clearedWideX = x - 1
      }
    }
@@ -795,6 +815,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
  // Pack cell data into cells array
  cells[ci] = internCharString(screen, cell.char)
  cells[ci + 1] = packWord1(cell.styleId, internHyperlink(screen, cell.hyperlink), cell.width)
+  screen.written[y * screen.width + x] = 1

  // Track damage - expand bounds in place instead of allocating new objects
  // Include the main cell position and any cleared orphan cells
@@ -841,11 +862,13 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
        if (spacerX + 1 < screen.width && (cells[orphanCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
          cells[orphanCI] = EMPTY_CHAR_INDEX
          cells[orphanCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+          screen.written[y * screen.width + spacerX + 1] = 0
        }
      }

      cells[spacerCI] = SPACER_CHAR_INDEX
      cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.SpacerTail)
+      screen.written[y * screen.width + spacerX] = 1

      // Expand damage to include SpacerTail so diff() scans it
      const d = screen.damage
@@ -929,6 +952,8 @@ export function blitRegion(
  const dstCells = dst.cells
  const srcNoSel = src.noSelect
  const dstNoSel = dst.noSelect
+  const srcWritten = src.written
+  const dstWritten = dst.written

  // softWrap is per-row — copy the row range regardless of stride/width.
  // Partial-width blits still carry the row's wrap provenance since the
@@ -947,6 +972,7 @@ export function blitRegion(
    const nsStart = regionY * src.width
    const nsLen = (maxY - regionY) * src.width
    dstNoSel.set(srcNoSel.subarray(nsStart, nsStart + nsLen), nsStart)
+    dstWritten.set(srcWritten.subarray(nsStart, nsStart + nsLen), nsStart)
  } else {
    // Per-row copy for partial-width or mismatched-stride regions
    let srcRowCI = regionY * srcStride + (regionX << 1)
@@ -957,6 +983,7 @@ export function blitRegion(
    for (let y = regionY; y < maxY; y++) {
      dstCells.set(srcCells.subarray(srcRowCI, srcRowCI + rowBytes), dstRowCI)
      dstNoSel.set(srcNoSel.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
+      dstWritten.set(srcWritten.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
      srcRowCI += srcStride
      dstRowCI += dstStride
      srcRowNS += src.width
@@ -989,6 +1016,7 @@ export function blitRegion(
      if ((srcCells[srcLastCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
        dstCells[dstSpacerCI] = SPACER_CHAR_INDEX
        dstCells[dstSpacerCI + 1] = packWord1(dst.emptyStyleId, 0, CellWidth.SpacerTail)
+        dstWritten[y * dst.width + maxX] = 1
        wroteSpacerOutsideRegion = true
      }

@@ -1030,6 +1058,7 @@ export function clearRegion(

  const cells = screen.cells
  const cells64 = screen.cells64
+  const written = screen.written
  const screenWidth = screen.width
  const rowBase = startY * screenWidth
  let damageMinX = startX
@@ -1040,6 +1069,7 @@ export function clearRegion(
  if (startX === 0 && maxX === screenWidth) {
    // Full-width: single fill, no boundary checks needed
    cells64.fill(EMPTY_CELL_VALUE, rowBase, rowBase + (maxY - startY) * screenWidth)
+    written.fill(0, rowBase, rowBase + (maxY - startY) * screenWidth)
  } else {
    // Partial-width: single loop handles boundary cleanup and fill per row.
    const stride = screenWidth << 1 // 2 Int32s per cell
@@ -1062,6 +1092,7 @@ export function clearRegion(
          if ((cells[prevW1]! & WIDTH_MASK) === CellWidth.Wide) {
            cells[prevW1 - 1] = EMPTY_CHAR_INDEX
            cells[prevW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+            written[y * screenWidth + startX - 1] = 0
            damageMinX = startX - 1
          }
        }
@@ -1078,12 +1109,14 @@ export function clearRegion(
          if ((cells[nextW1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
            cells[nextW1 - 1] = EMPTY_CHAR_INDEX
            cells[nextW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+            written[y * screenWidth + maxX] = 0
            damageMaxX = maxX + 1
          }
        }
      }

      cells64.fill(EMPTY_CELL_VALUE, fillStart, fillStart + rowLen)
+      written.fill(0, fillStart, fillStart + rowLen)
      leftEdge += stride
      rightEdge += stride
      fillStart += screenWidth
@@ -1120,12 +1153,14 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
  const w = screen.width
  const cells64 = screen.cells64
  const noSel = screen.noSelect
+  const written = screen.written
  const sw = screen.softWrap
  const absN = Math.abs(n)

  if (absN > bottom - top) {
    cells64.fill(EMPTY_CELL_VALUE, top * w, (bottom + 1) * w)
    noSel.fill(0, top * w, (bottom + 1) * w)
+    written.fill(0, top * w, (bottom + 1) * w)
    sw.fill(0, top, bottom + 1)

    return
@@ -1135,17 +1170,21 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
    // SU: row top+n..bottom → top..bottom-n; clear bottom-n+1..bottom
    cells64.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
    noSel.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
+    written.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
    sw.copyWithin(top, top + n, bottom + 1)
    cells64.fill(EMPTY_CELL_VALUE, (bottom - n + 1) * w, (bottom + 1) * w)
    noSel.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
+    written.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
    sw.fill(0, bottom - n + 1, bottom + 1)
  } else {
    // SD: row top..bottom+n → top-n..bottom; clear top..top-n-1
    cells64.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
    noSel.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
+    written.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
    sw.copyWithin(top - n, top, bottom + n + 1)
    cells64.fill(EMPTY_CELL_VALUE, top * w, (top - n) * w)
    noSel.fill(0, top * w, (top - n) * w)
+    written.fill(0, top * w, (top - n) * w)
    sw.fill(0, top, top - n)
  }
 }
@@ -0,0 +1,82 @@
+import { describe, expect, it } from 'vitest'
+
+import { cellAt, CellWidth, CharPool, createScreen, HyperlinkPool, setCellAt, StylePool } from './screen.js'
+import {
+  applySelectionOverlay,
+  createSelectionState,
+  getSelectedText,
+  startSelection,
+  updateSelection
+} from './selection.js'
+
+const screenWithText = () => {
+  const styles = new StylePool()
+  const screen = createScreen(10, 3, styles, new CharPool(), new HyperlinkPool())
+
+  setCellAt(screen, 2, 1, { char: 'h', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+  setCellAt(screen, 3, 1, { char: 'i', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+
+  return { screen, styles }
+}
+
+describe('selection whitespace handling', () => {
+  it('does not copy whitespace-only selections', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 0)
+
+    expect(getSelectedText(selection, screen)).toBe('')
+  })
+
+  it('trims outer drag padding while preserving selected content', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 1)
+    updateSelection(selection, 9, 1)
+
+    expect(getSelectedText(selection, screen)).toBe('hi')
+  })
+
+  it('preserves selected indentation when spaces are rendered content', () => {
+    const styles = new StylePool()
+    const screen = createScreen(10, 1, styles, new CharPool(), new HyperlinkPool())
+    const selection = createSelectionState()
+
+    setCellAt(screen, 0, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+    setCellAt(screen, 1, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+    setCellAt(screen, 2, 0, { char: 'x', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 0)
+
+    expect(getSelectedText(selection, screen)).toBe('  x')
+  })
+
+  it('clamps copied selection bounds to screen width', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 1)
+    updateSelection(selection, 99, 1)
+
+    expect(getSelectedText(selection, screen)).toBe('hi')
+  })
+
+  it('does not paint selection background on leading/trailing empty cells or empty rows', () => {
+    const { screen, styles } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 2)
+    applySelectionOverlay(screen, selection, styles)
+
+    expect(cellAt(screen, 0, 0)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 0, 1)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 2, 1)?.styleId).not.toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 4, 1)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 0, 2)?.styleId).toBe(screen.emptyStyleId)
+  })
+})
@@ -12,7 +12,7 @@

 import { clamp } from './layout/geometry.js'
 import type { Screen, StylePool } from './screen.js'
-import { cellAt, cellAtIndex, CellWidth, setCellStyleId } from './screen.js'
+import { cellAt, cellAtIndex, CellWidth, isWrittenCellAt, setCellStyleId } from './screen.js'

 type Point = { col: number; row: number }

@@ -842,6 +842,43 @@ export function isCellSelected(s: SelectionState, col: number, row: number): boo
  return true
 }

+function selectableCell(screen: Screen, row: number, col: number): boolean {
+  const cell = cellAt(screen, col, row)
+
+  return (
+    screen.noSelect[row * screen.width + col] !== 1 &&
+    isWrittenCellAt(screen, col, row) &&
+    !!cell &&
+    cell.width !== CellWidth.SpacerTail &&
+    cell.width !== CellWidth.SpacerHead
+  )
+}
+
+function selectionContentBounds(
+  screen: Screen,
+  row: number,
+  start: number,
+  end: number
+): { first: number; last: number } | null {
+  let first = start
+
+  while (first <= end && !selectableCell(screen, row, first)) {
+    first++
+  }
+
+  if (first > end) {
+    return null
+  }
+
+  let last = end
+
+  while (last >= first && !selectableCell(screen, row, last)) {
+    last--
+  }
+
+  return { first, last }
+}
+
 /** Extract text from one screen row. When the next row is a soft-wrap
 *  continuation (screen.softWrap[row+1]>0), clamp to that content-end
 *  column and skip the trailing trim so the word-separator space survives
@@ -890,6 +927,21 @@ function joinRows(lines: string[], text: string, sw: boolean | undefined): void
  }
 }

+function trimEmptyEdgeRows(lines: string[]): string[] {
+  let start = 0
+  let end = lines.length
+
+  while (start < end && !lines[start]!.trim()) {
+    start++
+  }
+
+  while (end > start && !lines[end - 1]!.trim()) {
+    end--
+  }
+
+  return lines.slice(start, end)
+}
+
 /**
 * Extract text from the screen buffer within the selection range.
 * Rows are joined with newlines unless the screen's softWrap bitmap
@@ -917,16 +969,18 @@ export function getSelectedText(s: SelectionState, screen: Screen): string {
  }

  for (let row = start.row; row <= end.row; row++) {
-    const rowStart = row === start.row ? start.col : 0
-    const rowEnd = row === end.row ? end.col : screen.width - 1
-    joinRows(lines, extractRowText(screen, row, rowStart, rowEnd), sw[row]! > 0)
+    const rowStart = Math.max(0, row === start.row ? start.col : 0)
+    const rowEnd = Math.min(row === end.row ? end.col : screen.width - 1, screen.width - 1)
+    const bounds = selectionContentBounds(screen, row, rowStart, rowEnd)
+
+    joinRows(lines, bounds ? extractRowText(screen, row, bounds.first, bounds.last) : '', sw[row]! > 0)
  }

  for (let i = 0; i < s.scrolledOffBelow.length; i++) {
    joinRows(lines, s.scrolledOffBelow[i]!, s.scrolledOffBelowSW[i])
  }

-  return lines.join('\n')
+  return trimEmptyEdgeRows(lines).join('\n')
 }

 /**
@@ -1051,9 +1105,14 @@ export function applySelectionOverlay(screen: Screen, selection: SelectionState,
  for (let row = start.row; row <= end.row && row < screen.height; row++) {
    const colStart = row === start.row ? start.col : 0
    const colEnd = row === end.row ? Math.min(end.col, width - 1) : width - 1
+    const bounds = selectionContentBounds(screen, row, colStart, colEnd)
    const rowOff = row * width

-    for (let col = colStart; col <= colEnd; col++) {
+    if (!bounds) {
+      continue
+    }
+
+    for (let col = bounds.first; col <= bounds.last; col++) {
      const idx = rowOff + col

      // Skip noSelect cells — gutters stay visually unchanged so it's
@@ -9,18 +9,21 @@ describe('shouldEmitClipboardSequence', () => {
  })

  it('keeps OSC enabled for remote or plain local terminals', () => {
-    expect(shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
-      true
-    )
+    expect(
+      shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)
+    ).toBe(true)
    expect(shouldEmitClipboardSequence({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
  })

  it('honors explicit env override', () => {
-    expect(shouldEmitClipboardSequence({ HERMES_TUI_CLIPBOARD_OSC52: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
-      true
-    )
-    expect(shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(
-      false
-    )
+    expect(
+      shouldEmitClipboardSequence({
+        HERMES_TUI_CLIPBOARD_OSC52: '1',
+        TMUX: '/tmp/tmux-1/default,1,0'
+      } as NodeJS.ProcessEnv)
+    ).toBe(true)
+    expect(
+      shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
+    ).toBe(false)
  })
 })
@@ -226,7 +226,10 @@ describe('createGatewayEventHandler', () => {
    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'

-    onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({
+      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

    expect(appended).toHaveLength(1)
@@ -17,6 +17,14 @@ describe('createSlashHandler', () => {
    expect(getOverlayState().picker).toBe(true)
  })

+  it('treats /provider as a local /model alias', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/provider')).toBe(true)
+    expect(getOverlayState().modelPicker).toBe(true)
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
  it('opens the skills hub locally for bare /skills', () => {
    const ctx = buildCtx()

@@ -118,9 +126,7 @@ describe('createSlashHandler', () => {
    const ctx = buildCtx()
    createSlashHandler(ctx)('/details tools blink')
    expect(getUiState().sections.tools).toBeUndefined()
-    expect(ctx.transcript.sys).toHaveBeenCalledWith(
-      'usage: /details <section> [hidden|collapsed|expanded|reset]'
-    )
+    expect(ctx.transcript.sys).toHaveBeenCalledWith('usage: /details <section> [hidden|collapsed|expanded|reset]')
  })

  it('shows tool enable usage when names are missing', () => {
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'

-import { isSectionName, parseDetailsMode, resolveSections, sectionMode, SECTION_NAMES } from '../domain/details.js'
+import { isSectionName, parseDetailsMode, resolveSections, SECTION_NAMES, sectionMode } from '../domain/details.js'

 describe('parseDetailsMode', () => {
  it('accepts the canonical modes case-insensitively', () => {
@@ -31,6 +31,28 @@ describe('platform action modifier', () => {
  })
 })

+describe('isCopyShortcut', () => {
+  it('keeps Ctrl+C as the local non-macOS copy chord', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+
+    expect(isCopyShortcut({ ctrl: true, meta: false, super: false }, 'c', {})).toBe(true)
+  })
+
+  it('accepts client Cmd+C over SSH even when running on Linux', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+    const env = { SSH_CONNECTION: '1 2 3 4' } as NodeJS.ProcessEnv
+
+    expect(isCopyShortcut({ ctrl: false, meta: false, super: true }, 'c', env)).toBe(true)
+    expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', env)).toBe(true)
+  })
+
+  it('does not treat local Linux Alt+C as copy', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+
+    expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', {})).toBe(false)
+  })
+})
+
 describe('isVoiceToggleKey', () => {
  it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => {
    const { isVoiceToggleKey } = await importPlatform('darwin')
@@ -1,8 +1,8 @@
 import { useStore } from '@nanostores/react'

 import { GatewayProvider } from './app/gatewayContext.js'
-import { useMainApp } from './app/useMainApp.js'
 import { $uiState } from './app/uiStore.js'
+import { useMainApp } from './app/useMainApp.js'
 import { AppLayout } from './components/appLayout.js'
 import type { GatewayClient } from './gatewayClient.js'

@@ -121,7 +121,7 @@ export interface ComposerActions {
  dequeue: () => string | undefined
  enqueue: (text: string) => void
  handleTextPaste: (event: PasteEvent) => MaybePromise<ComposerPasteResult | null>
-  openEditor: () => void
+  openEditor: () => Promise<void>
  pushHistory: (text: string) => void
  replaceQueue: (index: number, text: string) => void
  setCompIdx: StateSetter<number>
@@ -1,7 +1,7 @@
 import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
-import { SECTION_NAMES, isSectionName, nextDetailsMode, parseDetailsMode } from '../../../domain/details.js'
+import { isSectionName, nextDetailsMode, parseDetailsMode, SECTION_NAMES } from '../../../domain/details.js'
 import type {
  ConfigGetValueResponse,
  ConfigSetResponse,
@@ -40,8 +40,10 @@ const flagFromArg = (arg: string, current: boolean): boolean | null => {

 const RESET_WORDS = new Set(['reset', 'clear', 'default'])
 const CYCLE_WORDS = new Set(['cycle', 'toggle'])
+
 const DETAILS_USAGE =
  'usage: /details [hidden|collapsed|expanded|cycle]  or  /details <section> [hidden|collapsed|expanded|reset]'
+
 const DETAILS_SECTION_USAGE = 'usage: /details <section> [hidden|collapsed|expanded|reset]'

 export const coreCommands: SlashCommand[] = [
@@ -97,9 +99,7 @@ export const coreCommands: SlashCommand[] = [
      }

      patchUiState({ mouseTracking: next })
-      ctx.gateway
-        .rpc<ConfigSetResponse>('config.set', { key: 'mouse', value: next ? 'on' : 'off' })
-        .catch(() => {})
+      ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'mouse', value: next ? 'on' : 'off' }).catch(() => {})

      queueMicrotask(() => ctx.transcript.sys(`mouse tracking ${next ? 'on' : 'off'}`))
    }
@@ -178,7 +178,9 @@ export const coreCommands: SlashCommand[] = [
        gateway
          .rpc<ConfigGetValueResponse>('config.get', { key: 'details_mode' })
          .then(r => {
-            if (ctx.stale()) return
+            if (ctx.stale()) {
+              return
+            }

            const mode = parseDetailsMode(r?.value) ?? ui.detailsMode
            patchUiState({ detailsMode: mode })
@@ -267,7 +269,6 @@ export const coreCommands: SlashCommand[] = [
      }

      writeOsc52Clipboard(target.text)
-      sys(`copied ${target.text.length} chars`)
    }
  },

@@ -58,6 +58,7 @@ export const sessionCommands: SlashCommand[] = [

  {
    help: 'change or show model',
+    aliases: ['provider'],
    name: 'model',
    run: (arg, ctx) => {
      if (ctx.session.guardBusySessionSwitch('change models')) {
@@ -5,18 +5,6 @@ import { runExternalSetup } from '../../setupHandoff.js'
 import type { SlashCommand } from '../types.js'

 export const setupCommands: SlashCommand[] = [
-  {
-    help: 'configure LLM provider + model (launches `hermes model`)',
-    name: 'provider',
-    run: (_arg, ctx) =>
-      void runExternalSetup({
-        args: ['model'],
-        ctx,
-        done: 'provider updated — starting session…',
-        launcher: launchHermesCommand,
-        suspend: withInkSuspended
-      })
-  },
  {
    help: 'run full setup wizard (launches `hermes setup`)',
    name: 'setup',
@@ -300,6 +300,7 @@ class TurnController {

    const hasDiffSegment = segments.some(msg => msg.kind === 'diff')
    const detailsBelongBeforeDiff = hasDiffSegment && (tools.length > 0 || Boolean(savedReasoning))
+
    const finalMessages = detailsBelongBeforeDiff
      ? insertBeforeFirstDiff(segments, {
          kind: 'trail',
@@ -1,8 +1,8 @@
 import { atom } from 'nanostores'

+import { MOUSE_TRACKING } from '../config/env.js'
 import { ZERO } from '../domain/usage.js'
 import { DEFAULT_THEME } from '../theme.js'
-import { MOUSE_TRACKING } from '../config/env.js'

 import type { UiState } from './interfaces.js'

@@ -3,7 +3,7 @@ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
 import { tmpdir } from 'node:os'
 import { join } from 'node:path'

-import { useStdin } from '@hermes/ink'
+import { useStdin, withInkSuspended } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useCallback, useMemo, useState } from 'react'

@@ -14,6 +14,7 @@ import { useCompletion } from '../hooks/useCompletion.js'
 import { useInputHistory } from '../hooks/useInputHistory.js'
 import { useQueue } from '../hooks/useQueue.js'
 import { isUsableClipboardText, readClipboardText } from '../lib/clipboard.js'
+import { resolveEditor } from '../lib/editor.js'
 import { readOsc52Clipboard } from '../lib/osc52.js'
 import { isRemoteShellSession } from '../lib/terminalSetup.js'
 import { pasteTokenLabel, stripTrailingPasteNewlines } from '../lib/text.js'
@@ -253,26 +254,36 @@ export function useComposerState({
    [handleResolvedPaste, onClipboardPaste, querier]
  )

-  const openEditor = useCallback(() => {
-    const editor = process.env.EDITOR || process.env.VISUAL || 'vi'
-    const file = join(mkdtempSync(join(tmpdir(), 'hermes-')), 'prompt.md')
+  const openEditor = useCallback(async () => {
+    const dir = mkdtempSync(join(tmpdir(), 'hermes-'))
+    const file = join(dir, 'prompt.md')
+    const [cmd, ...args] = resolveEditor()

    writeFileSync(file, [...inputBuf, input].join('\n'))
-    process.stdout.write('\x1b[?1049l')
-    const { status: code } = spawnSync(editor, [file], { stdio: 'inherit' })
-    process.stdout.write('\x1b[?1049h\x1b[2J\x1b[H')

-    if (code === 0) {
+    let exitCode: null | number = null
+
+    await withInkSuspended(async () => {
+      exitCode = spawnSync(cmd!, [...args, file], { stdio: 'inherit' }).status
+    })
+
+    try {
+      if (exitCode !== 0) {
+        return
+      }
+
      const text = readFileSync(file, 'utf8').trimEnd()

-      if (text) {
-        setInput('')
-        setInputBuf([])
-        submitRef.current(text)
+      if (!text) {
+        return
      }
-    }

-    rmSync(file, { force: true })
+      setInput('')
+      setInputBuf([])
+      submitRef.current(text)
+    } finally {
+      rmSync(dir, { force: true, recursive: true })
+    }
  }, [input, inputBuf, submitRef])

  const actions = useMemo(
@@ -8,7 +8,7 @@ import type {
  SudoRespondResponse,
  VoiceRecordResponse
 } from '../gatewayTypes.js'
-import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js'
+import { isAction, isCopyShortcut, isMac, isVoiceToggleKey } from '../lib/platform.js'

 import { getInputSelection } from './inputSelectionStore.js'
 import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
@@ -30,11 +30,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
  const copySelection = () => {
    // ink's copySelection() already calls setClipboard() which handles
    // pbcopy (macOS), wl-copy/xclip (Linux), tmux, and OSC 52 fallback.
-    const text = terminal.selection.copySelection()
-
-    if (text) {
-      actions.sys(`copied ${text.length} chars`)
-    }
+    terminal.selection.copySelection()
  }

  const clearSelection = () => {
@@ -159,16 +155,14 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
      voice.setProcessing(false)
    }

-    gateway
-      .rpc<VoiceRecordResponse>('voice.record', { action })
-      .catch((e: Error) => {
-        // Revert optimistic UI on failure.
-        if (starting) {
-          voice.setRecording(false)
-        }
+    gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
+      // Revert optimistic UI on failure.
+      if (starting) {
+        voice.setRecording(false)
+      }

-        actions.sys(`voice error: ${e.message}`)
-      })
+      actions.sys(`voice error: ${e.message}`)
+    })
  }

  useInput((ch, key) => {
@@ -317,7 +311,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
      }
    }

-    if (isAction(key, ch, 'c')) {
+    if (isCopyShortcut(key, ch)) {
      if (terminal.hasSelection) {
        return copySelection()
      }
@@ -372,8 +366,13 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
      return voiceRecordToggle()
    }

-    if (isAction(key, ch, 'g')) {
-      return cActions.openEditor()
+    // Cmd/Ctrl+G, plus Alt+G fallback for VSCode/Cursor (they bind the
+    // primary keystroke to "Find Next" before the TUI sees it; Alt+G
+    // arrives as meta+g across platforms).
+    if (ch.toLowerCase() === 'g' && (isAction(key, ch, 'g') || key.meta)) {
+      return void cActions.openEditor().catch((err: unknown) => {
+        actions.sys(err instanceof Error ? `failed to open editor: ${err.message}` : 'failed to open editor')
+      })
    }

    // shift-tab flips yolo without spending a turn (claude-code parity)
@@ -640,14 +640,14 @@ export function useMainApp(gw: GatewayClient) {
  const showProgressArea = anyPanelVisible
    ? Boolean(
        ui.busy ||
-          turn.outcome ||
-          turn.streamPendingTools.length ||
-          turn.streamSegments.length ||
-          turn.subagents.length ||
-          turn.tools.length ||
-          turn.turnTrail.length ||
-          hasReasoning ||
-          turn.activity.length
+        turn.outcome ||
+        turn.streamPendingTools.length ||
+        turn.streamSegments.length ||
+        turn.subagents.length ||
+        turn.tools.length ||
+        turn.turnTrail.length ||
+        hasReasoning ||
+        turn.activity.length
      )
    : turn.activity.some(item => item.tone !== 'info')

@@ -218,11 +218,7 @@ export function StatusRule({
          {voiceLabel ? (
            <Text
              color={
-                voiceLabel.startsWith('●')
-                  ? t.color.error
-                  : voiceLabel.startsWith('◉')
-                    ? t.color.warn
-                    : t.color.dim
+                voiceLabel.startsWith('●') ? t.color.error : voiceLabel.startsWith('◉') ? t.color.warn : t.color.dim
              }
            >
              {' │ '}
@@ -9,6 +9,7 @@ import { $uiState } from '../app/uiStore.js'
 import { FloatBox } from './appChrome.js'
 import { MaskedPrompt } from './maskedPrompt.js'
 import { ModelPicker } from './modelPicker.js'
+import { OverlayHint } from './overlayControls.js'
 import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
 import { SessionPicker } from './sessionPicker.js'
 import { SkillsHub } from './skillsHub.js'
@@ -162,11 +163,11 @@ export function FloatingOverlays({
            ))}

            <Box marginTop={1}>
-              <Text color={ui.theme.color.dim}>
+              <OverlayHint t={ui.theme}>
                {overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
-                  ? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
-                  : `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
-              </Text>
+                  ? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · Esc/q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
+                  : `end · ↑↓/jk · b/PgUp back · g top · Esc/q close (${overlay.pager.lines.length} lines)`}
+              </OverlayHint>
            </Box>
          </Box>
        </FloatBox>
--- a/Show More
+++ b/Show More