feat: add fast-path setup for nous account

adds a nous account specific fast flow & autolaunches into chat if gateway isn't set up
change: always run setup on no-config run
2026-04-24 00:07:23 -04:00 · 2026-04-24 00:06:48 -04:00 · 2026-04-23 19:40:43 -05:00 · 2026-04-23 19:38:33 -05:00 · 2026-04-23 19:35:18 -05:00 · 2026-04-23 19:32:21 -05:00
40 changed files with 2935 additions and 236 deletions
--- a/agent/moonshot_schema.py
+++ b/agent/moonshot_schema.py
@@ -0,0 +1,190 @@
+"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
+
+Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
+tool calling.  Requests that violate it fail with HTTP 400:
+
+    tools.function.parameters is not a valid moonshot flavored json schema,
+    details: <...>
+
+Known rejection modes documented at
+https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
+and MoonshotAI/kimi-cli#1595:
+
+1. Every property schema must carry a ``type``.  Standard JSON Schema allows
+   type to be omitted (the value is then unconstrained); Moonshot refuses.
+2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
+   the parent.  Presence of both causes "type should be defined in anyOf
+   items instead of the parent schema".
+
+The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
+handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
+applies at MCP registration time for all providers.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List
+
+# Keys whose values are maps of name → schema (not schemas themselves).
+# When we recurse, we walk the values of these maps as schemas, but we do
+# NOT apply the missing-type repair to the map itself.
+_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
+
+# Keys whose values are lists of schemas.
+_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
+
+# Keys whose values are a single nested schema.
+_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
+
+
+def _repair_schema(node: Any, is_schema: bool = True) -> Any:
+    """Recursively apply Moonshot repairs to a schema node.
+
+    ``is_schema=True`` means this dict is a JSON Schema node and gets the
+    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
+    it's a container map (e.g. the value of ``properties``) and we only
+    recurse into its values.
+    """
+    if isinstance(node, list):
+        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
+        # every element is itself a schema.
+        return [_repair_schema(item, is_schema=True) for item in node]
+    if not isinstance(node, dict):
+        return node
+
+    # Walk the dict, deciding per-key whether recursion is into a schema
+    # node, a container map, or a scalar.
+    repaired: Dict[str, Any] = {}
+    for key, value in node.items():
+        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
+            # Map of name → schema.  Don't treat the map itself as a schema
+            # (it has no type / properties of its own), but each value is.
+            repaired[key] = {
+                sub_key: _repair_schema(sub_val, is_schema=True)
+                for sub_key, sub_val in value.items()
+            }
+        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
+            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key in _SCHEMA_NODE_KEYS:
+            # items / not / additionalProperties: single nested schema.
+            # additionalProperties can also be a bool — leave those alone.
+            if isinstance(value, dict):
+                repaired[key] = _repair_schema(value, is_schema=True)
+            else:
+                repaired[key] = value
+        else:
+            # Scalars (description, title, format, enum values, etc.) pass through.
+            repaired[key] = value
+
+    if not is_schema:
+        return repaired
+
+    # Rule 2: when anyOf is present, type belongs only on the children.
+    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
+        repaired.pop("type", None)
+        return repaired
+
+    # Rule 1: property schemas without type need one.  $ref nodes are exempt
+    # — their type comes from the referenced definition.
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)
+
+
+def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer a reasonable ``type`` if this schema node has none."""
+    if "type" in node and node["type"] not in (None, ""):
+        return node
+
+    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
+    # → type of first enum value, else fall back to ``string`` (safest scalar).
+    if "properties" in node or "required" in node or "additionalProperties" in node:
+        inferred = "object"
+    elif "items" in node or "prefixItems" in node:
+        inferred = "array"
+    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
+        sample = node["enum"][0]
+        if isinstance(sample, bool):
+            inferred = "boolean"
+        elif isinstance(sample, int):
+            inferred = "integer"
+        elif isinstance(sample, float):
+            inferred = "number"
+        else:
+            inferred = "string"
+    else:
+        inferred = "string"
+
+    return {**node, "type": inferred}
+
+
+def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a Moonshot-compatible object schema.
+
+    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
+    applied.  Input is not mutated.
+    """
+    if not isinstance(parameters, dict):
+        return {"type": "object", "properties": {}}
+
+    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
+    if not isinstance(repaired, dict):
+        return {"type": "object", "properties": {}}
+
+    # Top-level must be an object schema
+    if repaired.get("type") != "object":
+        repaired["type"] = "object"
+    if "properties" not in repaired:
+        repaired["properties"] = {}
+
+    return repaired
+
+
+def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
+    if not tools:
+        return tools
+
+    sanitized: List[Dict[str, Any]] = []
+    any_change = False
+    for tool in tools:
+        if not isinstance(tool, dict):
+            sanitized.append(tool)
+            continue
+        fn = tool.get("function")
+        if not isinstance(fn, dict):
+            sanitized.append(tool)
+            continue
+        params = fn.get("parameters")
+        repaired = sanitize_moonshot_tool_parameters(params)
+        if repaired is not params:
+            any_change = True
+            new_fn = {**fn, "parameters": repaired}
+            sanitized.append({**tool, "function": new_fn})
+        else:
+            sanitized.append(tool)
+
+    return sanitized if any_change else tools
+
+
+def is_moonshot_model(model: str | None) -> bool:
+    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
+
+    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
+    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
+    Detection by model name covers Nous / OpenRouter / other aggregators that
+    route to Moonshot's inference, where the base URL is the aggregator's, not
+    ``api.moonshot.ai``.
+    """
+    if not model:
+        return False
+    bare = model.strip().lower()
+    # Last path segment (covers aggregator-prefixed slugs)
+    tail = bare.rsplit("/", 1)[-1]
+    if tail.startswith("kimi-") or tail == "kimi":
+        return True
+    # Vendor-prefixed forms commonly used on aggregators
+    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
+        return True
+    return False
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):

        # Tools
        if tools:
+            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
+            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
+            # etc.) compatible, in addition to direct moonshot.ai endpoints.
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
            api_kwargs["tools"] = tools

        # max_tokens resolution — priority: ephemeral > user > provider default
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

+
+def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
+    """Resolve the toolset list for a cron job.
+
+    Precedence:
+    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
+       Keeps the agent's job-scoped toolset override intact — #6130.
+    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
+       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
+       so users can gate cron toolsets globally without recreating every job.
+    3. ``None`` on any lookup failure — AIAgent loads the full default set
+       (legacy behavior before this change, preserved as the safety net).
+
+    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
+    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
+    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
+    surprise $4.63 run).
+    """
+    per_job = job.get("enabled_toolsets")
+    if per_job:
+        return per_job
+    try:
+        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
+        return sorted(_get_platform_tools(cfg or {}, "cron"))
+    except Exception as exc:
+        logger.warning(
+            "Cron toolset resolution failed, falling back to full default toolset: %s",
+            exc,
+        )
+        return None
+
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -886,7 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
-            enabled_toolsets=job.get("enabled_toolsets") or None,
+            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -2821,6 +2821,7 @@ def _prompt_model_selection(
    pricing: Optional[Dict[str, Dict[str, str]]] = None,
    unavailable_models: Optional[List[str]] = None,
    portal_url: str = "",
+    allow_custom = True
 ) -> Optional[str]:
    """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.

@@ -2909,8 +2910,16 @@ def _prompt_model_selection(
        from simple_term_menu import TerminalMenu

        choices = [f"  {_label(mid)}" for mid in ordered]
-        choices.append("  Enter custom model name")
-        choices.append("  Skip (keep current)")
+
+        custom_idx = None
+        if allow_custom:
+            custom_idx = len(choices)
+            choices.append("  Enter custom model name")
+
+        skip_idx = None
+        if current_model:
+            skip_idx = len(choices)
+            choices.append("  Skip (keep current)")

        # Print the unavailable block BEFORE the menu via regular print().
        # simple_term_menu pads title lines to terminal width (causes wrapping),
@@ -2947,21 +2956,29 @@ def _prompt_model_selection(
        print()
        if idx < len(ordered):
            return ordered[idx]
-        elif idx == len(ordered):
+        if idx == custom_idx:
            custom = input("Enter model name: ").strip()
            return custom if custom else None
+        if idx == skip_idx:
+            return None
        return None
    except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
        pass

    # Fallback: numbered list
    print(menu_title)
-    num_width = len(str(len(ordered) + 2))
+    n = len(ordered)
+    extra = []
+    if allow_custom:
+        extra.append("Enter custom model name")
+    if current_model:
+        extra.append("Skip (keep current)")
+    total = n + len(extra)
+    num_width = len(str(total))
    for i, mid in enumerate(ordered, 1):
        print(f"  {i:>{num_width}}. {_label(mid)}")
-    n = len(ordered)
-    print(f"  {n + 1:>{num_width}}. Enter custom model name")
-    print(f"  {n + 2:>{num_width}}. Skip (keep current)")
+    for j, label in enumerate(extra, n + 1):
+        print(f"  {j:>{num_width}}. {label}")

    if _unavailable:
        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
@@ -2973,18 +2990,19 @@ def _prompt_model_selection(

    while True:
        try:
-            choice = input(f"Choice [1-{n + 2}] (default: skip): ").strip()
+            choice = input(f"Choice [1-{total}]: ").strip()
            if not choice:
                return None
-            idx = int(choice)
-            if 1 <= idx <= n:
-                return ordered[idx - 1]
-            elif idx == n + 1:
-                custom = input("Enter model name: ").strip()
-                return custom if custom else None
-            elif idx == n + 2:
-                return None
-            print(f"Please enter 1-{n + 2}")
+            val = int(choice)
+            if 1 <= val <= n:
+                return ordered[val - 1]
+            extra_idx = val - n - 1
+            if 0 <= extra_idx < len(extra):
+                if extra[extra_idx] == "Enter custom model name":
+                    custom = input("Enter model name: ").strip()
+                    return custom if custom else None
+                return None  # skip
+            print(f"Please enter 1-{total}")
        except ValueError:
            print("Please enter a number")
        except (KeyboardInterrupt, EOFError):
@@ -3260,7 +3278,6 @@ def _nous_device_code_login(
        open_browser = False

    print(f"Starting Hermes login via {pconfig.name}...")
-    print(f"Portal: {portal_base_url}")
    if insecure:
        print("TLS verification: disabled (--insecure)")
    elif ca_bundle:
@@ -3280,19 +3297,18 @@ def _nous_device_code_login(
        interval = int(device_data["interval"])

        print()
-        print("To continue:")
-        print(f"  1. Open: {verification_url}")
-        print(f"  2. If prompted, enter code: {user_code}")
-
        if open_browser:
            opened = webbrowser.open(verification_url)
            if opened:
-                print("  (Opened browser for verification)")
+                print("If you don't see a browser window open, navigate to this URL:")
            else:
-                print("  Could not open browser automatically — use the URL above.")
+                print("Navigate to this URL to continue:")
+        print(verification_url)
+        print(f"If you're prompted for a code, use {user_code}")
+        print()

        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-        print(f"Waiting for approval (polling every {effective_interval}s)...")
+        print(f"Waiting for approval (checking every {effective_interval}s)...")

        token_data = _poll_for_token(
            client=client,
@@ -3357,7 +3373,7 @@ def _nous_device_code_login(
        raise


-def _login_nous(args, pconfig: ProviderConfig) -> None:
+def login_nous(args, pconfig: ProviderConfig) -> None:
    """Nous Portal device authorization flow."""
    timeout_seconds = getattr(args, "timeout", None) or 15.0
    insecure = bool(getattr(args, "insecure", False))
@@ -3419,7 +3435,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])

+            _portal = auth_state.get("portal_base_url", "")
+
            print()
+
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
@@ -3428,14 +3447,17 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
                        model_ids, pricing, free_tier=True,
                    )
-            _portal = auth_state.get("portal_base_url", "")
-            if model_ids:
-                print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-                selected_model = _prompt_model_selection(
-                    model_ids, pricing=pricing,
-                    unavailable_models=unavailable_models,
-                    portal_url=_portal,
-                )
+                if not free_tier:
+                    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+                if len(model_ids) > 1:
+                    selected_model = _prompt_model_selection(
+                        model_ids, pricing=pricing,
+                        unavailable_models=unavailable_models,
+                        portal_url=_portal,
+                        allow_custom=not free_tier
+                    )
+                else:
+                    selected_model = model_ids[0]
            elif unavailable_models:
                _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
                print("No free models currently available.")
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -739,6 +739,10 @@ DEFAULT_CONFIG = {
        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
+        "child_timeout_seconds": 600,  # wall-clock timeout for each child agent (floor 30s,
+                                       # no ceiling). High-reasoning models on large tasks
+                                       # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
+                                       # raise if children time out before producing output.
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1085,9 +1085,6 @@ def cmd_chat(args):
        print(
            "It looks like Hermes isn't configured yet -- no API keys or providers found."
        )
-        print()
-        print("  Run:  hermes setup")
-        print()

        from hermes_cli.setup import (
            is_interactive_stdin,
@@ -1100,16 +1097,8 @@ def cmd_chat(args):
            )
            sys.exit(1)

-        try:
-            reply = input("Run setup now? [Y/n] ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            reply = "n"
-        if reply in ("", "y", "yes"):
-            cmd_setup(args)
-            return
-        print()
-        print("You can run 'hermes setup' at any time to configure.")
-        sys.exit(1)
+        cmd_setup(args)
+        return

    # Start update check in background (runs while other init happens)
    try:
@@ -2135,7 +2124,7 @@ def _model_flow_nous(config, current_model="", args=None):
        resolve_nous_runtime_credentials,
        AuthError,
        format_auth_error,
-        _login_nous,
+        login_nous,
        PROVIDER_REGISTRY,
    )
    from hermes_cli.config import (
@@ -2148,8 +2137,6 @@ def _model_flow_nous(config, current_model="", args=None):

    state = get_provider_auth_state("nous")
    if not state or not state.get("access_token"):
-        print("Not logged into Nous Portal. Starting login...")
-        print()
        try:
            mock_args = argparse.Namespace(
                portal_url=getattr(args, "portal_url", None),
@@ -2161,7 +2148,7 @@ def _model_flow_nous(config, current_model="", args=None):
                ca_bundle=getattr(args, "ca_bundle", None),
                insecure=bool(getattr(args, "insecure", False)),
            )
-            _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+            login_nous(mock_args, PROVIDER_REGISTRY["nous"])
            # Offer Tool Gateway enablement for paid subscribers
            try:
                _refreshed = load_config() or {}
@@ -2212,7 +2199,7 @@ def _model_flow_nous(config, current_model="", args=None):
                    ca_bundle=None,
                    insecure=False,
                )
-                _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+                login_nous(mock_args, PROVIDER_REGISTRY["nous"])
            except Exception as login_exc:
                print(f"Re-login failed: {login_exc}")
            return
--- a/hermes_cli/platforms.py
+++ b/hermes_cli/platforms.py
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
+    ("cron",           PlatformInfo(label="⏰ Cron",            default_toolset="hermes-cron")),
 ])


--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -18,9 +18,10 @@ import shutil
 import sys
 import copy
 from pathlib import Path
-from typing import Optional, Dict, Any
+from typing import Literal, Optional, Dict, Any

 from hermes_cli.nous_subscription import get_nous_subscription_features
+from hermes_cli.main import _model_flow_nous
 from tools.tool_backend_helpers import managed_nous_tools_enabled
 from utils import base_url_hostname
 from hermes_constants import get_optional_skills_dir
@@ -655,7 +656,7 @@ def _prompt_container_resources(config: dict):



-def setup_model_provider(config: dict, *, quick: bool = False):
+def setup_model_provider(config: dict, *, quick: bool | Literal["nous_portal"] = False):
    """Configure the inference provider and default model.

    Delegates to ``cmd_model()`` (the same flow used by ``hermes model``)
@@ -677,7 +678,11 @@ def setup_model_provider(config: dict, *, quick: bool = False):
    # credential prompting, model selection, and config persistence.
    from hermes_cli.main import select_provider_and_model
    try:
-        select_provider_and_model()
+        if quick == "nous_portal":
+            config = load_config()
+            _model_flow_nous(config)
+        else:
+            select_provider_and_model()
    except (SystemExit, KeyboardInterrupt):
        print()
        print_info("Provider setup skipped.")
@@ -3030,11 +3035,15 @@ def run_setup_wizard(args):
            config = load_config()

        setup_mode = prompt_choice("How would you like to set up Hermes?", [
-            "Quick setup — provider, model & messaging (recommended)",
+            "Nous Account setup — model & messaging (recommended)",
+            "Quick setup — provider, model & messaging",
            "Full setup — configure everything",
        ], 0)

        if setup_mode == 0:
+            _run_first_time_quick_setup(config, hermes_home, is_existing, nous_quick=True)
+            return
+        if setup_mode == 1:
            _run_first_time_quick_setup(config, hermes_home, is_existing)
            return

@@ -3095,7 +3104,7 @@ def _resolve_hermes_chat_argv() -> Optional[list[str]]:
    return None


-def _offer_launch_chat():
+def _offer_launch_chat(auto_launch = False):
    """Prompt the user to jump straight into chat after setup."""
    print()
    if not prompt_yes_no("Launch hermes chat now?", True):
@@ -3109,7 +3118,7 @@ def _offer_launch_chat():
    os.execvp(chat_argv[0], chat_argv)


-def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
+def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, nous_quick=False):
    """Streamlined first-time setup: provider + model only.

    Applies sensible defaults for TTS (Edge), terminal (local), agent
@@ -3117,7 +3126,7 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
    ``hermes setup <section>``.
    """
    # Step 1: Model & Provider (essential — skips rotation/vision/TTS)
-    setup_model_provider(config, quick=True)
+    setup_model_provider(config, quick="nous_portal" if nous_quick else True )

    # Step 2: Apply defaults for everything else
    _apply_default_agent_settings(config)
@@ -3150,7 +3159,9 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):

    _print_setup_summary(config, hermes_home)

-    _offer_launch_chat()
+    # if the user hasn't set up the gateway, assume they want to launch chat.
+    force_launch_chat = gateway_choice == 0
+    _offer_launch_chat(force_launch_chat)


 def _run_quick_setup(config: dict, hermes_home):
--- a/hermes_cli/voice.py
+++ b/hermes_cli/voice.py
@@ -0,0 +1,548 @@
+"""Process-wide voice recording + TTS API for the TUI gateway.
+
+Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
+(text-to-speech) behind idempotent, stateful entry points that the gateway's
+``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
+call from a dedicated thread. The gateway imports this module lazily so that
+missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
+an ``ImportError`` at call time, not at startup.
+
+Two usage modes are exposed:
+
+* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
+  manually-bounded capture used when the caller drives the start/stop pair
+  explicitly.
+* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
+  the classic CLI voice mode: recording auto-stops on silence, transcribes,
+  hands the result to a callback, and then auto-restarts for the next turn.
+  Three consecutive no-speech cycles stop the loop and fire
+  ``on_silent_limit`` so the UI can turn the mode off.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+import threading
+from typing import Any, Callable, Optional
+
+from tools.voice_mode import (
+    create_audio_recorder,
+    is_whisper_hallucination,
+    play_audio_file,
+    transcribe_recording,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _debug(msg: str) -> None:
+    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
+
+    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
+    which createGatewayEventHandler shows as an Activity line — exactly
+    what we need to diagnose "why didn't the loop auto-restart?" in the
+    user's real terminal without shipping a separate debug RPC.
+
+    Any OSError / BrokenPipeError is swallowed because this fires from
+    background threads (silence callback, TTS daemon, beep) where a
+    broken stderr pipe must not kill the whole gateway — the main
+    command pipe (stdin+stdout) is what actually matters.
+    """
+    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
+        return
+    try:
+        print(f"[voice] {msg}", file=sys.stderr, flush=True)
+    except (BrokenPipeError, OSError):
+        pass
+
+
+def _beeps_enabled() -> bool:
+    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
+    try:
+        from hermes_cli.config import load_config
+
+        voice_cfg = load_config().get("voice", {})
+        if isinstance(voice_cfg, dict):
+            return bool(voice_cfg.get("beep_enabled", True))
+    except Exception:
+        pass
+    return True
+
+
+def _play_beep(frequency: int, count: int = 1) -> None:
+    """Audible cue matching cli.py's record/stop beeps.
+
+    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
+    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
+    Best-effort — sounddevice failures are silently swallowed so the
+    voice loop never breaks because a speaker was unavailable.
+    """
+    if not _beeps_enabled():
+        return
+    try:
+        from tools.voice_mode import play_beep
+
+        play_beep(frequency=frequency, count=count)
+    except Exception as e:
+        _debug(f"beep {frequency}Hz failed: {e}")
+
+# ── Push-to-talk state ───────────────────────────────────────────────
+_recorder = None
+_recorder_lock = threading.Lock()
+
+# ── Continuous (VAD) state ───────────────────────────────────────────
+_continuous_lock = threading.Lock()
+_continuous_active = False
+_continuous_recorder: Any = None
+
+# ── TTS-vs-STT feedback guard ────────────────────────────────────────
+# When TTS plays the agent reply over the speakers, the live microphone
+# picks it up and transcribes the agent's own voice as user input — an
+# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
+# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
+# playing, set while silent. _continuous_on_silence waits on it before
+# re-arming the recorder, and speak_text itself cancels any live capture
+# before starting playback so the tail of the previous utterance doesn't
+# leak into the mic.
+_tts_playing = threading.Event()
+_tts_playing.set()  # initially "not playing"
+_continuous_on_transcript: Optional[Callable[[str], None]] = None
+_continuous_on_status: Optional[Callable[[str], None]] = None
+_continuous_on_silent_limit: Optional[Callable[[], None]] = None
+_continuous_no_speech_count = 0
+_CONTINUOUS_NO_SPEECH_LIMIT = 3
+
+
+# ── Push-to-talk API ─────────────────────────────────────────────────
+
+
+def start_recording() -> None:
+    """Begin capturing from the default input device (push-to-talk).
+
+    Idempotent — calling again while a recording is in progress is a no-op.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        if _recorder is not None and getattr(_recorder, "is_recording", False):
+            return
+        rec = create_audio_recorder()
+        rec.start()
+        _recorder = rec
+
+
+def stop_and_transcribe() -> Optional[str]:
+    """Stop the active push-to-talk recording, transcribe, return text.
+
+    Returns ``None`` when no recording is active, when the microphone
+    captured no speech, or when Whisper returned a known hallucination.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        rec = _recorder
+        _recorder = None
+
+    if rec is None:
+        return None
+
+    wav_path = rec.stop()
+    if not wav_path:
+        return None
+
+    try:
+        result = transcribe_recording(wav_path)
+    except Exception as e:
+        logger.warning("voice transcription failed: %s", e)
+        return None
+    finally:
+        try:
+            if os.path.isfile(wav_path):
+                os.unlink(wav_path)
+        except Exception:
+            pass
+
+    # transcribe_recording returns {"success": bool, "transcript": str, ...}
+    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
+    if not result.get("success"):
+        return None
+    text = (result.get("transcript") or "").strip()
+    if not text or is_whisper_hallucination(text):
+        return None
+
+    return text
+
+
+# ── Continuous (VAD) API ─────────────────────────────────────────────
+
+
+def start_continuous(
+    on_transcript: Callable[[str], None],
+    on_status: Optional[Callable[[str], None]] = None,
+    on_silent_limit: Optional[Callable[[], None]] = None,
+    silence_threshold: int = 200,
+    silence_duration: float = 3.0,
+) -> None:
+    """Start a VAD-driven continuous recording loop.
+
+    The loop calls ``on_transcript(text)`` each time speech is detected and
+    transcribed successfully, then auto-restarts. After
+    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
+    picked up at all) the loop stops itself and calls ``on_silent_limit``
+    so the UI can reflect "voice off". Idempotent — calling while already
+    active is a no-op.
+
+    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
+    ``"idle"`` so the UI can show a live indicator.
+    """
+    global _continuous_active, _continuous_recorder
+    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_no_speech_count
+
+    with _continuous_lock:
+        if _continuous_active:
+            _debug("start_continuous: already active — no-op")
+            return
+        _continuous_active = True
+        _continuous_on_transcript = on_transcript
+        _continuous_on_status = on_status
+        _continuous_on_silent_limit = on_silent_limit
+        _continuous_no_speech_count = 0
+
+        if _continuous_recorder is None:
+            _continuous_recorder = create_audio_recorder()
+
+        _continuous_recorder._silence_threshold = silence_threshold
+        _continuous_recorder._silence_duration = silence_duration
+        rec = _continuous_recorder
+
+    _debug(
+        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
+    )
+
+    # CLI parity: single 880 Hz beep *before* opening the stream — placing
+    # the beep after stream.start() on macOS triggers a CoreAudio conflict
+    # (cli.py:7528 comment).
+    _play_beep(frequency=880, count=1)
+
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to start continuous recording: %s", e)
+        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        raise
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+def stop_continuous() -> None:
+    """Stop the active continuous loop and release the microphone.
+
+    Idempotent — calling while not active is a no-op. Any in-flight
+    transcription completes but its result is discarded (the callback
+    checks ``_continuous_active`` before firing).
+    """
+    global _continuous_active, _continuous_on_transcript
+    global _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_recorder, _continuous_no_speech_count
+
+    with _continuous_lock:
+        if not _continuous_active:
+            return
+        _continuous_active = False
+        rec = _continuous_recorder
+        on_status = _continuous_on_status
+        _continuous_on_transcript = None
+        _continuous_on_status = None
+        _continuous_on_silent_limit = None
+        _continuous_no_speech_count = 0
+
+    if rec is not None:
+        try:
+            # cancel() (not stop()) discards buffered frames — the loop
+            # is over, we don't want to transcribe a half-captured turn.
+            rec.cancel()
+        except Exception as e:
+            logger.warning("failed to cancel recorder: %s", e)
+
+    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
+    # silence-auto-stop path plays).
+    _play_beep(frequency=660, count=2)
+
+    if on_status:
+        try:
+            on_status("idle")
+        except Exception:
+            pass
+
+
+def is_continuous_active() -> bool:
+    """Whether a continuous voice loop is currently running."""
+    with _continuous_lock:
+        return _continuous_active
+
+
+def _continuous_on_silence() -> None:
+    """AudioRecorder silence callback — runs in a daemon thread.
+
+    Stops the current capture, transcribes, delivers the text via
+    ``on_transcript``, and — if the loop is still active — starts the
+    next capture. Three consecutive silent cycles end the loop.
+    """
+    global _continuous_active, _continuous_no_speech_count
+
+    _debug("_continuous_on_silence: fired")
+
+    with _continuous_lock:
+        if not _continuous_active:
+            _debug("_continuous_on_silence: loop inactive — abort")
+            return
+        rec = _continuous_recorder
+        on_transcript = _continuous_on_transcript
+        on_status = _continuous_on_status
+        on_silent_limit = _continuous_on_silent_limit
+
+    if rec is None:
+        _debug("_continuous_on_silence: no recorder — abort")
+        return
+
+    if on_status:
+        try:
+            on_status("transcribing")
+        except Exception:
+            pass
+
+    wav_path = rec.stop()
+    # Peak RMS is the critical diagnostic when stop() returns None despite
+    # the VAD firing — tells us at a glance whether the mic was too quiet
+    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
+    peak_rms = getattr(rec, "_peak_rms", -1)
+    _debug(
+        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
+    )
+
+    # CLI parity: double 660 Hz beep after the stream stops (safe from the
+    # CoreAudio conflict that blocks pre-start beeps).
+    _play_beep(frequency=660, count=2)
+
+    transcript: Optional[str] = None
+
+    if wav_path:
+        try:
+            result = transcribe_recording(wav_path)
+            # transcribe_recording returns {"success": bool, "transcript": str,
+            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
+            # produced empty transcripts even when Groq/local STT returned fine,
+            # which masqueraded as "not hearing the user" to the caller.
+            success = bool(result.get("success"))
+            text = (result.get("transcript") or "").strip()
+            err = result.get("error")
+            _debug(
+                f"_continuous_on_silence: transcribe -> success={success} "
+                f"text={text!r} err={err!r}"
+            )
+            if success and text and not is_whisper_hallucination(text):
+                transcript = text
+        except Exception as e:
+            logger.warning("continuous transcription failed: %s", e)
+            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
+        finally:
+            try:
+                if os.path.isfile(wav_path):
+                    os.unlink(wav_path)
+            except Exception:
+                pass
+
+    with _continuous_lock:
+        if not _continuous_active:
+            # User stopped us while we were transcribing — discard.
+            _debug("_continuous_on_silence: stopped during transcribe — no restart")
+            return
+        if transcript:
+            _continuous_no_speech_count = 0
+        else:
+            _continuous_no_speech_count += 1
+        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
+        no_speech = _continuous_no_speech_count
+
+    if transcript and on_transcript:
+        try:
+            on_transcript(transcript)
+        except Exception as e:
+            logger.warning("on_transcript callback raised: %s", e)
+
+    if should_halt:
+        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
+        with _continuous_lock:
+            _continuous_active = False
+            _continuous_no_speech_count = 0
+        if on_silent_limit:
+            try:
+                on_silent_limit()
+            except Exception:
+                pass
+        try:
+            rec.cancel()
+        except Exception:
+            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass
+        return
+
+    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
+    # finish before re-arming the mic, then leave a small gap to avoid
+    # catching the tail of the speaker output.  Without this the voice
+    # loop becomes a feedback loop — the agent's spoken reply lands
+    # back in the mic and gets re-submitted.
+    if not _tts_playing.is_set():
+        _debug("_continuous_on_silence: waiting for TTS to finish")
+        _tts_playing.wait(timeout=60)
+        import time as _time
+        _time.sleep(0.3)
+
+        # User may have stopped the loop during the wait.
+        with _continuous_lock:
+            if not _continuous_active:
+                _debug("_continuous_on_silence: stopped while waiting for TTS")
+                return
+
+    # Restart for the next turn.
+    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+    _play_beep(frequency=880, count=1)
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to restart continuous recording: %s", e)
+        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        return
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+# ── TTS API ──────────────────────────────────────────────────────────
+
+
+def speak_text(text: str) -> None:
+    """Synthesize ``text`` with the configured TTS provider and play it.
+
+    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
+    pipeline, same 4000-char cap, same explicit mp3 output path, same
+    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
+    of both extensions. Keeping these in sync means a voice-mode TTS
+    session in the TUI sounds identical to one in the classic CLI.
+
+    While playback is in flight the module-level _tts_playing Event is
+    cleared so the continuous-recording loop knows to wait before
+    re-arming the mic (otherwise the agent's spoken reply feedback-loops
+    through the microphone and the agent ends up replying to itself).
+    """
+    if not text or not text.strip():
+        return
+
+    import re
+    import tempfile
+    import time
+
+    # Cancel any live capture before we open the speakers — otherwise the
+    # last ~200ms of the user's turn tail + the first syllables of our TTS
+    # both end up in the next recording window.  The continuous loop will
+    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
+    paused_recording = False
+    with _continuous_lock:
+        if (
+            _continuous_active
+            and _continuous_recorder is not None
+            and getattr(_continuous_recorder, "is_recording", False)
+        ):
+            try:
+                _continuous_recorder.cancel()
+                paused_recording = True
+            except Exception as e:
+                logger.warning("failed to pause recorder for TTS: %s", e)
+
+    _tts_playing.clear()
+    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
+
+    try:
+        from tools.tts_tool import text_to_speech_tool
+
+        tts_text = text[:4000] if len(text) > 4000 else text
+        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
+        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
+        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
+        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
+        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
+        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
+        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
+        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
+        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
+        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
+        tts_text = tts_text.strip()
+        if not tts_text:
+            return
+
+        # MP3 output path, pre-chosen so we can play the MP3 directly even
+        # when text_to_speech_tool auto-converts to OGG for messaging
+        # platforms.  afplay's OGG support is flaky, MP3 always works.
+        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
+        mp3_path = os.path.join(
+            tempfile.gettempdir(),
+            "hermes_voice",
+            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
+        )
+
+        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
+        text_to_speech_tool(text=tts_text, output_path=mp3_path)
+
+        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
+            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
+            play_audio_file(mp3_path)
+            try:
+                os.unlink(mp3_path)
+                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
+                if os.path.isfile(ogg_path):
+                    os.unlink(ogg_path)
+            except OSError:
+                pass
+        else:
+            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
+    except Exception as e:
+        logger.warning("Voice TTS playback failed: %s", e)
+        _debug(f"speak_text raised {type(e).__name__}: {e}")
+    finally:
+        _tts_playing.set()
+        _debug("speak_text: TTS done")
+
+        # Re-arm the mic so the user can answer without pressing Ctrl+B.
+        # Small delay lets the OS flush speaker output and afplay fully
+        # release the audio device before sounddevice re-opens the input.
+        if paused_recording:
+            time.sleep(0.3)
+            with _continuous_lock:
+                if _continuous_active and _continuous_recorder is not None:
+                    try:
+                        _continuous_recorder.start(
+                            on_silence_stop=_continuous_on_silence
+                        )
+                        _debug("speak_text: recording resumed after TTS")
+                    except Exception as e:
+                        logger.warning(
+                            "failed to resume recorder after TTS: %s", e
+                        )
--- a/model_tools.py
+++ b/model_tools.py
@@ -418,6 +418,31 @@ def _coerce_value(value: str, expected_type):
        return _coerce_number(value, integer_only=(expected_type == "integer"))
    if expected_type == "boolean":
        return _coerce_boolean(value)
+    if expected_type == "array":
+        return _coerce_json(value, list)
+    if expected_type == "object":
+        return _coerce_json(value, dict)
+    return value
+
+
+def _coerce_json(value: str, expected_python_type: type):
+    """Parse *value* as JSON when the schema expects an array or object.
+
+    Handles model output drift where a complex oneOf/discriminated-union schema
+    causes the LLM to emit the array/object as a JSON string instead of a native
+    structure.  Returns the original string if parsing fails or yields the wrong
+    Python type.
+    """
+    try:
+        parsed = json.loads(value)
+    except (ValueError, TypeError):
+        return value
+    if isinstance(parsed, expected_python_type):
+        logger.debug(
+            "coerce_tool_args: coerced string to %s via json.loads",
+            expected_python_type.__name__,
+        )
+        return parsed
    return value


--- a/scripts/release.py
+++ b/scripts/release.py
@@ -167,6 +167,7 @@ AUTHOR_MAP = {
    "socrates1024@gmail.com": "socrates1024",
    "seanalt555@gmail.com": "Salt-555",
    "satelerd@gmail.com": "satelerd",
+    "dan@danlynn.com": "danklynn",
    "numman.ali@gmail.com": "nummanali",
    "rohithsaimidigudla@gmail.com": "whitehatjr1001",
    "0xNyk@users.noreply.github.com": "0xNyk",
--- a/tests/agent/test_moonshot_schema.py
+++ b/tests/agent/test_moonshot_schema.py
@@ -0,0 +1,254 @@
+"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
+
+Moonshot's tool-parameter validator rejects several shapes that the rest of
+the JSON Schema ecosystem accepts:
+
+1. Properties without ``type`` — Moonshot requires ``type`` on every node.
+2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
+   ``anyOf`` children.
+
+These tests cover the repairs applied by ``agent/moonshot_schema.py``.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent.moonshot_schema import (
+    is_moonshot_model,
+    sanitize_moonshot_tool_parameters,
+    sanitize_moonshot_tools,
+)
+
+
+class TestMoonshotModelDetection:
+    """is_moonshot_model() must match across aggregator prefixes."""
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "kimi-k2.6",
+            "kimi-k2-thinking",
+            "moonshotai/Kimi-K2.6",
+            "moonshotai/kimi-k2.6",
+            "nous/moonshotai/kimi-k2.6",
+            "openrouter/moonshotai/kimi-k2-thinking",
+            "MOONSHOTAI/KIMI-K2.6",
+        ],
+    )
+    def test_positive_matches(self, model):
+        assert is_moonshot_model(model) is True
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "",
+            None,
+            "anthropic/claude-sonnet-4.6",
+            "openai/gpt-5.4",
+            "google/gemini-3-flash-preview",
+            "deepseek-chat",
+        ],
+    )
+    def test_negative_matches(self, model):
+        assert is_moonshot_model(model) is False
+
+
+class TestMissingTypeFilled:
+    """Rule 1: every property must carry a type."""
+
+    def test_property_without_type_gets_string(self):
+        params = {
+            "type": "object",
+            "properties": {"query": {"description": "a bare property"}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["query"]["type"] == "string"
+
+    def test_property_with_enum_infers_type_from_first_value(self):
+        params = {
+            "type": "object",
+            "properties": {"flag": {"enum": [True, False]}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["flag"]["type"] == "boolean"
+
+    def test_nested_properties_are_repaired(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "filter": {
+                    "type": "object",
+                    "properties": {
+                        "field": {"description": "no type"},
+                    },
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
+
+    def test_array_items_without_type_get_repaired(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "tags": {
+                    "type": "array",
+                    "items": {"description": "tag entry"},
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["tags"]["items"]["type"] == "string"
+
+    def test_ref_node_is_not_given_synthetic_type(self):
+        """$ref nodes should NOT get a synthetic type — the referenced
+        definition supplies it, and Moonshot would reject the conflict."""
+        params = {
+            "type": "object",
+            "properties": {"payload": {"$ref": "#/$defs/Payload"}},
+            "$defs": {"Payload": {"type": "object", "properties": {}}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert "type" not in out["properties"]["payload"]
+        assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
+
+
+class TestAnyOfParentType:
+    """Rule 2: type must not appear at the anyOf parent level."""
+
+    def test_parent_type_stripped_when_anyof_present(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "from_format": {
+                    "type": "string",
+                    "anyOf": [
+                        {"type": "string"},
+                        {"type": "null"},
+                    ],
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        from_format = out["properties"]["from_format"]
+        assert "type" not in from_format
+        assert "anyOf" in from_format
+
+    def test_anyof_children_missing_type_get_filled(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "value": {
+                    "anyOf": [
+                        {"type": "string"},
+                        {"description": "A typeless option"},
+                    ],
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        children = out["properties"]["value"]["anyOf"]
+        assert children[0]["type"] == "string"
+        assert "type" in children[1]
+
+
+class TestTopLevelGuarantees:
+    """The returned top-level schema is always a well-formed object."""
+
+    def test_non_dict_input_returns_empty_object(self):
+        assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
+        assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
+        assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
+
+    def test_non_object_top_level_coerced(self):
+        params = {"type": "string"}
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["type"] == "object"
+        assert "properties" in out
+
+    def test_does_not_mutate_input(self):
+        params = {
+            "type": "object",
+            "properties": {"q": {"description": "no type"}},
+        }
+        snapshot = {
+            "type": params["type"],
+            "properties": {"q": dict(params["properties"]["q"])},
+        }
+        sanitize_moonshot_tool_parameters(params)
+        assert params["type"] == snapshot["type"]
+        assert "type" not in params["properties"]["q"]
+
+
+class TestToolListSanitizer:
+    """sanitize_moonshot_tools() walks an OpenAI-format tool list."""
+
+    def test_applies_per_tool(self):
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"q": {"description": "query"}},
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "noop",
+                    "description": "Does nothing",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            },
+        ]
+        out = sanitize_moonshot_tools(tools)
+        assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
+        # Second tool already clean — should be structurally equivalent
+        assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
+
+    def test_empty_list_is_passthrough(self):
+        assert sanitize_moonshot_tools([]) == []
+        assert sanitize_moonshot_tools(None) is None
+
+    def test_skips_malformed_entries(self):
+        """Entries without a function dict are passed through untouched."""
+        tools = [{"type": "function"}, {"not": "a tool"}]
+        out = sanitize_moonshot_tools(tools)
+        assert out == tools
+
+
+class TestRealWorldMCPShape:
+    """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
+
+    def test_combined_rewrites(self):
+        # Shape: missing type on a property, anyOf with parent type, array
+        # items without type — all in one tool.
+        params = {
+            "type": "object",
+            "properties": {
+                "query": {"description": "search text"},
+                "filter": {
+                    "type": "string",
+                    "anyOf": [
+                        {"type": "string"},
+                        {"type": "null"},
+                    ],
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {"description": "tag"},
+                },
+            },
+            "required": ["query"],
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["query"]["type"] == "string"
+        assert "type" not in out["properties"]["filter"]
+        assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
+        assert out["properties"]["tags"]["items"]["type"] == "string"
+        assert out["required"] == ["query"]
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -238,6 +238,56 @@ class TestChatCompletionsKimi:
        )
        assert kw["extra_body"]["thinking"] == {"type": "disabled"}

+    def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
+        """Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "q": {"description": "query"},  # missing type
+                        },
+                    },
+                },
+            },
+        ]
+        kw = transport.build_kwargs(
+            model="moonshotai/kimi-k2.6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=tools,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
+
+    def test_non_moonshot_tools_are_not_mutated(self, transport):
+        """Other models don't go through the Moonshot sanitizer."""
+        original_params = {
+            "type": "object",
+            "properties": {"q": {"description": "query"}},  # missing type
+        }
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": original_params,
+                },
+            },
+        ]
+        kw = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=tools,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # The parameters dict is passed through untouched (no synthetic type)
+        assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
+

 class TestChatCompletionsValidate:

--- a/tests/cli/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -571,7 +571,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
        captured["ca_bundle"] = login_args.ca_bundle
        captured["insecure"] = login_args.insecure

-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login)
+    monkeypatch.setattr("hermes_cli.auth.login_nous", _fake_login)

    hermes_main.cmd_model(
        SimpleNamespace(
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -710,7 +710,15 @@ class TestRunJobSessionPersistence:
        kwargs = mock_agent_cls.call_args.kwargs
        assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]

-    def test_run_job_enabled_toolsets_none_when_not_set(self, tmp_path):
+    def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
+        """When a job has no explicit enabled_toolsets, the scheduler now
+        resolves them from ``hermes tools`` platform config for ``cron``
+        (PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
+
+        The legacy "pass None → AIAgent loads full default" path is still
+        reachable, but only when ``_get_platform_tools`` raises (safety net
+        for any unexpected config shape).
+        """
        job = {
            "id": "no-toolset-job",
            "name": "test",
@@ -725,7 +733,39 @@ class TestRunJobSessionPersistence:
            run_job(job)

        kwargs = mock_agent_cls.call_args.kwargs
-        assert kwargs["enabled_toolsets"] is None
+        # Resolution happened — not None, is a list.
+        assert isinstance(kwargs["enabled_toolsets"], list)
+        # The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
+        # (``moa``, ``homeassistant``, ``rl``) removed. The most important
+        # invariant: ``moa`` is NOT in the default cron toolset, so a cron
+        # run cannot accidentally spin up frontier models.
+        assert "moa" not in kwargs["enabled_toolsets"]
+
+    def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
+        """Per-job enabled_toolsets (via cronjob tool) always take precedence
+        over the platform-level ``hermes tools`` config."""
+        job = {
+            "id": "override-job",
+            "name": "test",
+            "prompt": "hello",
+            "enabled_toolsets": ["terminal"],
+        }
+        fake_db, patches = self._make_run_job_patches(tmp_path)
+        # Even if the user has ``hermes tools`` configured to enable web+file
+        # for cron, the per-job override wins.
+        with patches[0], patches[1], patches[2], patches[3], patches[4], \
+             patch("run_agent.AIAgent") as mock_agent_cls, \
+             patch(
+                 "hermes_cli.tools_config._get_platform_tools",
+                 return_value={"web", "file"},
+             ):
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+            run_job(job)
+
+        kwargs = mock_agent_cls.call_args.kwargs
+        assert kwargs["enabled_toolsets"] == ["terminal"]

    def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
        """Empty final_response should stay empty for delivery logic (issue #2234).
--- a/tests/gateway/test_complete_path_at_filter.py
+++ b/tests/gateway/test_complete_path_at_filter.py
@@ -1,22 +1,28 @@
 """Regression tests for the TUI gateway's `complete.path` handler.

-Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
-with no colon yet) still surfaced files alongside directories in the
-TUI composer, because the gateway-side completion lives in
-`tui_gateway/server.py` and was never touched by the earlier fix to
-`hermes_cli/commands.py`.
+Reported during the TUI v2 blitz retest:
+  - typing `@folder:` (and `@folder` with no colon yet) surfaced files
+    alongside directories — the gateway-side completion lives in
+    `tui_gateway/server.py` and was never touched by the earlier fix to
+    `hermes_cli/commands.py`.
+  - typing `@appChrome` required the full `@ui-tui/src/components/app…`
+    path to find the file — users expect Cmd-P-style fuzzy basename
+    matching across the repo, not a strict directory prefix filter.

 Covers:
  - `@folder:` only yields directories
  - `@file:` only yields regular files
  - Bare `@folder` / `@file` (no colon) lists cwd directly
  - Explicit prefix is preserved in the completion text
+  - `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
 """

 from __future__ import annotations

 from pathlib import Path

+import pytest
+
 from tui_gateway import server


@@ -33,6 +39,15 @@ def _items(word: str):
    return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]


+@pytest.fixture(autouse=True)
+def _reset_fuzzy_cache(monkeypatch):
+    # Each test walks a fresh tmp dir; clear the cached listing so prior
+    # roots can't leak through the TTL window.
+    server._fuzzy_cache.clear()
+    yield
+    server._fuzzy_cache.clear()
+
+
 def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
    monkeypatch.chdir(tmp_path)
    _fixture(tmp_path)
@@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):

    for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
        assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
+
+
+# ── Fuzzy basename matching ──────────────────────────────────────────────
+# Users shouldn't have to know the full path — typing `@appChrome` should
+# find `ui-tui/src/components/appChrome.tsx`.
+
+
+def _nested_fixture(tmp_path: Path):
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "ui-tui/src/components").mkdir(parents=True)
+    (tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
+    (tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
+    (tmp_path / "tui_gateway").mkdir()
+    (tmp_path / "tui_gateway/server.py").write_text("x")
+
+
+def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
+    """`@appChrome` — with no slash — should surface the nested file."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    entries = _items("@appChrome")
+    texts = [t for t, _, _ in entries]
+
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+    # Display is the basename, meta is the containing directory, so the
+    # picker can show `appChrome.tsx  ui-tui/src/components` on one row.
+    row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
+    assert row[1] == "appChrome.tsx"
+    assert row[2] == "ui-tui/src/components"
+
+
+def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
+    """Better matches sort before weaker matches regardless of path depth."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+    (tmp_path / "server.py").write_text("x")  # exact basename match at root
+
+    texts = [t for t, _, _ in _items("@server")]
+
+    # Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
+    # rank 1 on basename but exact basename wins on the sort key; shorter
+    # rel path breaks ties.
+    assert texts[0] == "@file:server.py", texts
+    assert "@file:tui_gateway/server.py" in texts
+
+
+def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
+    """Mid-basename camelCase pieces match without substring scanning."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@Chrome")]
+
+    # `Chrome` starts a camelCase word inside `appChrome.tsx`.
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+
+def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
+    """`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@uCo")]
+
+    assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
+
+
+def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
+    """Explicit `@file:` prefix still wins the completion tag."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file:appChrome")]
+
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+
+def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
+    """Any `/` in the query = user is navigating; keep directory listing."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
+
+    # Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
+    # It should only surface direct children of the named dir — not the
+    # nested `useCompletion.ts`.
+    assert any("appChrome.tsx" in t for t in texts), texts
+    assert not any("useCompletion.ts" in t for t in texts), texts
+
+
+def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
+    """`@folder:<name>` still lists directories — fuzzy scanner only walks
+    files (git-tracked + untracked), so defer to the dir-listing path."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder:ui")]
+
+    # Root has `ui-tui/` as a directory; the listing branch should surface it.
+    assert any(t.startswith("@folder:ui-tui") for t in texts), texts
+
+
+def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
+    """`.env` doesn't leak into `@env` but does show for `@.env`."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    assert not any(".env" in t for t, _, _ in _items("@env"))
+    assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
+
+
+def test_fuzzy_caps_results(tmp_path, monkeypatch):
+    """The 30-item cap survives a big tree."""
+    monkeypatch.chdir(tmp_path)
+    for i in range(60):
+        (tmp_path / f"mod_{i:03d}.py").write_text("x")
+
+    items = _items("@mod")
+
+    assert len(items) == 30
+
+
+def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
+    """When the gateway runs from a subdirectory of a git repo, fuzzy
+    completion paths must resolve under that cwd — not under the repo root.
+
+    Without this, `@appChrome` from inside `apps/web/` would suggest
+    `@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
+    look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
+    `git ls-files` result back to a `relpath(root)` and drop anything
+    outside `root` so the completion contract stays "paths are cwd-relative".
+    """
+    import subprocess
+
+    subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
+    subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
+    subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
+
+    (tmp_path / "apps" / "web" / "src").mkdir(parents=True)
+    (tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
+    (tmp_path / "apps" / "api" / "src").mkdir(parents=True)
+    (tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
+    (tmp_path / "README.md").write_text("x")
+
+    subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
+    subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
+
+    # Run from `apps/web/` — completions should be relative to here, and
+    # files outside this subtree (apps/api, README.md at root) shouldn't
+    # appear at all.
+    monkeypatch.chdir(tmp_path / "apps" / "web")
+
+    texts = [t for t, _, _ in _items("@appChrome")]
+
+    assert "@file:src/appChrome.tsx" in texts, texts
+    assert not any("apps/web/" in t for t in texts), texts
+
+    server._fuzzy_cache.clear()
+    other_texts = [t for t, _, _ in _items("@server")]
+
+    assert not any("server.ts" in t for t in other_texts), other_texts
+
+    server._fuzzy_cache.clear()
+    readme_texts = [t for t, _, _ in _items("@README")]
+
+    assert not any("README.md" in t for t in readme_texts), readme_texts
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency:

        gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
        # Exclude non-messaging platforms from the check
-        non_messaging = {"cli", "api_server"}
+        non_messaging = {"cli", "api_server", "cron"}
        for platform, meta in PLATFORMS.items():
            if platform in non_messaging:
                continue
--- a/tests/hermes_cli/test_voice_wrapper.py
+++ b/tests/hermes_cli/test_voice_wrapper.py
@@ -0,0 +1,255 @@
+"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper.
+
+The module is imported *lazily* by ``tui_gateway/server.py`` so that a
+box with missing audio deps fails at call time (returning a clean RPC
+error) rather than at gateway startup. These tests therefore only
+assert the public contract the gateway depends on: the three symbols
+exist, ``stop_and_transcribe`` is a no-op when nothing is recording,
+and ``speak_text`` tolerates empty input without touching the provider
+stack.
+"""
+
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+class TestPublicAPI:
+    def test_gateway_symbols_importable(self):
+        """Match the exact import shape tui_gateway/server.py uses."""
+        from hermes_cli.voice import (
+            speak_text,
+            start_recording,
+            stop_and_transcribe,
+        )
+
+        assert callable(start_recording)
+        assert callable(stop_and_transcribe)
+        assert callable(speak_text)
+
+
+class TestStopWithoutStart:
+    def test_returns_none_when_no_recording_active(self, monkeypatch):
+        """Idempotent no-op: stop before start must not raise or touch state."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_recorder", None)
+
+        assert voice.stop_and_transcribe() is None
+
+
+class TestSpeakTextGuards:
+    @pytest.mark.parametrize("text", ["", "   ", "\n\t  "])
+    def test_empty_text_is_noop(self, text):
+        """Empty / whitespace-only text must return without importing tts_tool
+        (the gateway spawns a thread per call, so a no-op on empty input
+        keeps the thread pool from churning on trivial inputs)."""
+        from hermes_cli.voice import speak_text
+
+        # Should simply return None without raising.
+        assert speak_text(text) is None
+
+
+class TestContinuousAPI:
+    """Continuous (VAD) mode API — CLI-parity loop entry points."""
+
+    def test_continuous_exports(self):
+        from hermes_cli.voice import (
+            is_continuous_active,
+            start_continuous,
+            stop_continuous,
+        )
+
+        assert callable(start_continuous)
+        assert callable(stop_continuous)
+        assert callable(is_continuous_active)
+
+    def test_not_active_by_default(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Isolate from any state left behind by other tests in the session.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        assert voice.is_continuous_active() is False
+
+    def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
+        """stop_continuous must not raise when no loop is active — the
+        gateway's voice.toggle off path calls it unconditionally."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        # Should return cleanly without exceptions
+        assert voice.stop_continuous() is None
+        assert voice.is_continuous_active() is False
+
+    def test_double_start_is_idempotent(self, monkeypatch):
+        """A second start_continuous while already active is a no-op — prevents
+        two overlapping capture threads fighting over the microphone when the
+        UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", True)
+        called = {"n": 0}
+
+        class FakeRecorder:
+            def start(self, on_silence_stop=None):
+                called["n"] += 1
+
+            def cancel(self):
+                pass
+
+        monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
+
+        voice.start_continuous(on_transcript=lambda _t: None)
+
+        # The guard inside start_continuous short-circuits before rec.start()
+        assert called["n"] == 0
+
+
+class TestContinuousLoopSimulation:
+    """End-to-end simulation of the VAD loop with a fake recorder.
+
+    Proves auto-restart works: the silence callback must trigger transcribe →
+    on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
+    the 3-strikes no-speech halt.
+    """
+
+    @pytest.fixture
+    def fake_recorder(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Reset module state between tests.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+        monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
+        monkeypatch.setattr(voice, "_continuous_on_transcript", None)
+        monkeypatch.setattr(voice, "_continuous_on_status", None)
+        monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+
+        class FakeRecorder:
+            _silence_threshold = 200
+            _silence_duration = 3.0
+            is_recording = False
+
+            def __init__(self):
+                self.start_calls = 0
+                self.last_callback = None
+                self.stopped = 0
+                self.cancelled = 0
+                # Preset WAV path returned by stop()
+                self.next_stop_wav = "/tmp/fake.wav"
+
+            def start(self, on_silence_stop=None):
+                self.start_calls += 1
+                self.last_callback = on_silence_stop
+                self.is_recording = True
+
+            def stop(self):
+                self.stopped += 1
+                self.is_recording = False
+                return self.next_stop_wav
+
+            def cancel(self):
+                self.cancelled += 1
+                self.is_recording = False
+
+        rec = FakeRecorder()
+        monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
+        # Skip real file ops in the silence callback.
+        monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
+        return rec
+
+    def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening"]
+
+        # Simulate AudioRecorder's silence detector firing.
+        fake_recorder.last_callback()
+
+        assert transcripts == ["hello world"]
+        assert fake_recorder.start_calls == 2  # auto-restarted
+        assert statuses == ["listening", "transcribing", "listening"]
+        assert voice.is_continuous_active() is True
+
+        voice.stop_continuous()
+
+    def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Transcription returns no speech — fake_recorder.stop() returns the
+        # path, but transcribe returns empty text, counting as silence.
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        silent_limit_fired = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_silent_limit=lambda: silent_limit_fired.append(True),
+        )
+
+        # Fire silence callback 3 times
+        for _ in range(3):
+            fake_recorder.last_callback()
+
+        assert transcripts == []
+        assert silent_limit_fired == [True]
+        assert voice.is_continuous_active() is False
+        assert fake_recorder.cancelled >= 1
+
+    def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
+        """User hits Ctrl+B mid-transcription: the in-flight transcript must
+        still fire (it's a real utterance), but the loop must NOT restart."""
+        import hermes_cli.voice as voice
+
+        stop_triggered = {"flag": False}
+
+        def late_transcribe(_p):
+            # Simulate stop_continuous arriving while we're inside transcribe
+            voice.stop_continuous()
+            stop_triggered["flag"] = True
+            return {"success": True, "transcript": "final word"}
+
+        monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
+
+        initial_starts = fake_recorder.start_calls  # 1
+        fake_recorder.last_callback()
+
+        assert stop_triggered["flag"] is True
+        # Loop is stopped — no auto-restart
+        assert fake_recorder.start_calls == initial_starts
+        # The in-flight transcript was suppressed because we stopped mid-flight
+        assert transcripts == []
+        assert voice.is_continuous_active() is False
--- a/tests/run_agent/test_tool_arg_coercion.py
+++ b/tests/run_agent/test_tool_arg_coercion.py
@@ -134,6 +134,31 @@ class TestCoerceValue:
        """A non-numeric string in [number, string] should stay a string."""
        assert _coerce_value("hello", ["number", "string"]) == "hello"

+    def test_array_type_parsed_from_json_string(self):
+        """Stringified JSON arrays are parsed into native lists."""
+        assert _coerce_value('["a", "b"]', "array") == ["a", "b"]
+        assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3]
+
+    def test_object_type_parsed_from_json_string(self):
+        """Stringified JSON objects are parsed into native dicts."""
+        assert _coerce_value('{"k": "v"}', "object") == {"k": "v"}
+        assert _coerce_value('{"n": 1}', "object") == {"n": 1}
+
+    def test_array_invalid_json_preserved(self):
+        """Unparseable strings are returned unchanged."""
+        assert _coerce_value("not-json", "array") == "not-json"
+
+    def test_object_invalid_json_preserved(self):
+        assert _coerce_value("not-json", "object") == "not-json"
+
+    def test_array_type_wrong_shape_preserved(self):
+        """A JSON object passed for an 'array' slot is preserved as a string."""
+        assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}'
+
+    def test_object_type_wrong_shape_preserved(self):
+        """A JSON array passed for an 'object' slot is preserved as a string."""
+        assert _coerce_value('["a"]', "object") == '["a"]'
+

 # ── Full coerce_tool_args with registry ───────────────────────────────────

@@ -212,6 +237,32 @@ class TestCoerceToolArgs:
            assert result["items"] == [1, 2, 3]
            assert result["config"] == {"key": "val"}

+    def test_coerces_stringified_array_arg(self):
+        """Regression for #3947 — MCP servers using z.array() expect lists, not strings."""
+        schema = self._mock_schema({
+            "messageIds": {"type": "array", "items": {"type": "string"}},
+        })
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"messageIds": '["abc", "def"]'}
+            result = coerce_tool_args("test_tool", args)
+            assert result["messageIds"] == ["abc", "def"]
+
+    def test_coerces_stringified_object_arg(self):
+        """Stringified JSON objects get parsed into dicts."""
+        schema = self._mock_schema({"config": {"type": "object"}})
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"config": '{"max": 50}'}
+            result = coerce_tool_args("test_tool", args)
+            assert result["config"] == {"max": 50}
+
+    def test_invalid_json_array_preserved_as_string(self):
+        """If the string isn't valid JSON, pass it through — let the tool decide."""
+        schema = self._mock_schema({"items": {"type": "array"}})
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"items": "not-json"}
+            result = coerce_tool_args("test_tool", args)
+            assert result["items"] == "not-json"
+
    def test_extra_args_without_schema_left_alone(self):
        """Args not in the schema properties are not touched."""
        schema = self._mock_schema({"limit": {"type": "integer"}})
--- a/tests/tools/test_mcp_tool.py
+++ b/tests/tools/test_mcp_tool.py
@@ -120,6 +120,177 @@ class TestSchemaConversion:

        assert schema["parameters"] == {"type": "object", "properties": {}}

+    def test_definitions_refs_are_rewritten_to_defs(self):
+        from tools.mcp_tool import _convert_mcp_schema
+
+        mcp_tool = _make_mcp_tool(
+            name="submit",
+            description="Submit a payload",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "input": {"$ref": "#/definitions/Payload"},
+                },
+                "required": ["input"],
+                "definitions": {
+                    "Payload": {
+                        "type": "object",
+                        "properties": {
+                            "query": {"type": "string"},
+                        },
+                        "required": ["query"],
+                    }
+                },
+            },
+        )
+
+        schema = _convert_mcp_schema("forms", mcp_tool)
+
+        assert schema["parameters"]["properties"]["input"]["$ref"] == "#/$defs/Payload"
+        assert "$defs" in schema["parameters"]
+        assert "definitions" not in schema["parameters"]
+
+    def test_nested_definition_refs_are_rewritten_recursively(self):
+        from tools.mcp_tool import _convert_mcp_schema
+
+        mcp_tool = _make_mcp_tool(
+            name="nested",
+            description="Nested schema",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "items": {
+                        "type": "array",
+                        "items": {"$ref": "#/definitions/Entry"},
+                    },
+                },
+                "definitions": {
+                    "Entry": {
+                        "type": "object",
+                        "properties": {
+                            "child": {"$ref": "#/definitions/Child"},
+                        },
+                    },
+                    "Child": {
+                        "type": "object",
+                        "properties": {
+                            "value": {"type": "string"},
+                        },
+                    },
+                },
+            },
+        )
+
+        schema = _convert_mcp_schema("forms", mcp_tool)
+
+        assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry"
+        assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child"
+
+    def test_missing_type_on_object_is_coerced(self):
+        """Schemas that describe an object but omit ``type`` get type='object'."""
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "properties": {"q": {"type": "string"}},
+            "required": ["q"],
+        })
+
+        assert schema["type"] == "object"
+        assert schema["properties"]["q"]["type"] == "string"
+        assert schema["required"] == ["q"]
+
+    def test_null_type_on_object_is_coerced(self):
+        """type: None should be treated like missing type (common MCP server bug)."""
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "type": None,
+            "properties": {"x": {"type": "integer"}},
+        })
+
+        assert schema["type"] == "object"
+
+    def test_required_pruned_when_property_missing(self):
+        """Gemini 400s on required names that don't exist in properties."""
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "type": "object",
+            "properties": {"a": {"type": "string"}},
+            "required": ["a", "ghost", "phantom"],
+        })
+
+        assert schema["required"] == ["a"]
+
+    def test_required_removed_when_all_names_dangle(self):
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "type": "object",
+            "properties": {},
+            "required": ["ghost"],
+        })
+
+        assert "required" not in schema
+
+    def test_required_pruning_applies_recursively_inside_nested_objects(self):
+        """Nested object schemas also get required pruning."""
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "type": "object",
+            "properties": {
+                "filter": {
+                    "type": "object",
+                    "properties": {"field": {"type": "string"}},
+                    "required": ["field", "missing"],
+                },
+            },
+        })
+
+        assert schema["properties"]["filter"]["required"] == ["field"]
+
+    def test_object_in_array_items_gets_properties_filled(self):
+        """Array-item object schemas without properties get an empty dict."""
+        from tools.mcp_tool import _normalize_mcp_input_schema
+
+        schema = _normalize_mcp_input_schema({
+            "type": "object",
+            "properties": {
+                "items": {
+                    "type": "array",
+                    "items": {"type": "object"},
+                },
+            },
+        })
+
+        assert schema["properties"]["items"]["items"]["properties"] == {}
+
+    def test_convert_mcp_schema_survives_missing_inputschema_attribute(self):
+        """A Tool object without .inputSchema must not crash registration."""
+        import types
+
+        from tools.mcp_tool import _convert_mcp_schema
+
+        bare_tool = types.SimpleNamespace(name="probe", description="Probe")
+        schema = _convert_mcp_schema("srv", bare_tool)
+
+        assert schema["name"] == "mcp_srv_probe"
+        assert schema["parameters"] == {"type": "object", "properties": {}}
+
+    def test_convert_mcp_schema_with_none_inputschema(self):
+        """Tool with inputSchema=None produces a valid empty object schema."""
+        import types
+
+        from tools.mcp_tool import _convert_mcp_schema
+
+        # Note: _make_mcp_tool(input_schema=None) falls back to a default —
+        # build the namespace directly so .inputSchema really is None.
+        mcp_tool = types.SimpleNamespace(name="probe", description="Probe", inputSchema=None)
+        schema = _convert_mcp_schema("srv", mcp_tool)
+
+        assert schema["parameters"] == {"type": "object", "properties": {}}
+
    def test_tool_name_prefix_format(self):
        from tools.mcp_tool import _convert_mcp_schema

--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -298,7 +298,7 @@ def _get_child_timeout() -> float:
    """Read delegation.child_timeout_seconds from config.

    Returns the number of seconds a single child agent is allowed to run
-    before being considered stuck.  Default: 300 s (5 minutes).
+    before being considered stuck.  Default: 600 s (10 minutes).
    """
    cfg = _load_config()
    val = cfg.get("child_timeout_seconds")
@@ -409,7 +409,7 @@ def _preserve_parent_mcp_toolsets(


 DEFAULT_MAX_ITERATIONS = 50
-DEFAULT_CHILD_TIMEOUT = 300  # seconds before a child agent is considered stuck
+DEFAULT_CHILD_TIMEOUT = 600  # seconds before a child agent is considered stuck
 _HEARTBEAT_INTERVAL = 30  # seconds between parent activity heartbeats during delegation
 _HEARTBEAT_STALE_CYCLES = (
    5  # mark child stale after this many heartbeats with no iteration progress
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -2019,14 +2019,92 @@ def _make_check_fn(server_name: str):
 # ---------------------------------------------------------------------------

 def _normalize_mcp_input_schema(schema: dict | None) -> dict:
-    """Normalize MCP input schemas for LLM tool-calling compatibility."""
+    """Normalize MCP input schemas for LLM tool-calling compatibility.
+
+    MCP servers can emit plain JSON Schema with ``definitions`` /
+    ``#/definitions/...`` references.  Kimi / Moonshot rejects that form and
+    requires local refs to point into ``#/$defs/...`` instead.  Normalize the
+    common draft-07 shape here so MCP tool schemas remain portable across
+    OpenAI-compatible providers.
+
+    Additional MCP-server robustness repairs applied recursively:
+
+    * Missing or ``null`` ``type`` on an object-shaped node is coerced to
+      ``"object"`` (some servers omit it).  See PR #4897.
+    * When an ``object`` node lacks ``properties``, an empty ``properties``
+      dict is added so ``required`` entries don't dangle.
+    * ``required`` arrays are pruned to only names that exist in
+      ``properties``; otherwise Google AI Studio / Gemini 400s with
+      ``property is not defined``.  See PR #4651.
+
+    All repairs are provider-agnostic and ideally produce a schema valid on
+    OpenAI, Anthropic, Gemini, and Moonshot in one pass.
+    """
    if not schema:
        return {"type": "object", "properties": {}}

-    if schema.get("type") == "object" and "properties" not in schema:
-        return {**schema, "properties": {}}
+    def _rewrite_local_refs(node):
+        if isinstance(node, dict):
+            normalized = {}
+            for key, value in node.items():
+                out_key = "$defs" if key == "definitions" else key
+                normalized[out_key] = _rewrite_local_refs(value)
+            ref = normalized.get("$ref")
+            if isinstance(ref, str) and ref.startswith("#/definitions/"):
+                normalized["$ref"] = "#/$defs/" + ref[len("#/definitions/"):]
+            return normalized
+        if isinstance(node, list):
+            return [_rewrite_local_refs(item) for item in node]
+        return node

-    return schema
+    def _repair_object_shape(node):
+        """Recursively repair object-shaped nodes: fill type, prune required."""
+        if isinstance(node, list):
+            return [_repair_object_shape(item) for item in node]
+        if not isinstance(node, dict):
+            return node
+
+        repaired = {k: _repair_object_shape(v) for k, v in node.items()}
+
+        # Coerce missing / null type when the shape is clearly an object
+        # (has properties or required but no type).
+        if not repaired.get("type") and (
+            "properties" in repaired or "required" in repaired
+        ):
+            repaired["type"] = "object"
+
+        if repaired.get("type") == "object":
+            # Ensure properties exists so required can reference it safely
+            if "properties" not in repaired or not isinstance(
+                repaired.get("properties"), dict
+            ):
+                repaired["properties"] = {} if "properties" not in repaired else repaired["properties"]
+                if not isinstance(repaired.get("properties"), dict):
+                    repaired["properties"] = {}
+
+            # Prune required to only include names that exist in properties
+            required = repaired.get("required")
+            if isinstance(required, list):
+                props = repaired.get("properties") or {}
+                valid = [r for r in required if isinstance(r, str) and r in props]
+                if len(valid) != len(required):
+                    if valid:
+                        repaired["required"] = valid
+                    else:
+                        repaired.pop("required", None)
+
+        return repaired
+
+    normalized = _rewrite_local_refs(schema)
+    normalized = _repair_object_shape(normalized)
+
+    # Ensure top-level is a well-formed object schema
+    if not isinstance(normalized, dict):
+        return {"type": "object", "properties": {}}
+    if normalized.get("type") == "object" and "properties" not in normalized:
+        normalized = {**normalized, "properties": {}}
+
+    return normalized


 def sanitize_mcp_name_component(value: str) -> str:
@@ -2057,7 +2135,7 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
    return {
        "name": prefixed_name,
        "description": mcp_tool.description or f"MCP tool {mcp_tool.name} from {server_name}",
-        "parameters": _normalize_mcp_input_schema(mcp_tool.inputSchema),
+        "parameters": _normalize_mcp_input_schema(getattr(mcp_tool, "inputSchema", None)),
    }


--- a/toolsets.py
+++ b/toolsets.py
@@ -295,7 +295,18 @@ TOOLSETS = {
        "tools": _HERMES_CORE_TOOLS,
        "includes": []
    },
-    
+
+    "hermes-cron": {
+        # Mirrors hermes-cli so cron's "default" toolset is the same set of
+        # core tools users see interactively — then `hermes tools` filters
+        # them down per the platform config. _DEFAULT_OFF_TOOLSETS (moa,
+        # homeassistant, rl) are excluded by _get_platform_tools() unless
+        # the user explicitly enables them.
+        "description": "Default cron toolset - same core tools as hermes-cli; gated by `hermes tools`",
+        "tools": _HERMES_CORE_TOOLS,
+        "includes": []
+    },
+
    "hermes-telegram": {
        "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)",
        "tools": _HERMES_CORE_TOOLS,
--- a/tui_gateway/entry.py
+++ b/tui_gateway/entry.py
@@ -1,19 +1,93 @@
 import json
+import os
 import signal
 import sys
+import time
+import traceback

-from tui_gateway.server import dispatch, resolve_skin, write_json
+from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json

-signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def _log_signal(signum: int, frame) -> None:
+    """Capture WHICH thread and WHERE a termination signal hit us.
+
+    SIG_DFL for SIGPIPE kills the process silently the instant any
+    background thread (TTS playback, beep, voice status emitter, etc.)
+    writes to a stdout the TUI has stopped reading.  Without this
+    handler the gateway-exited banner in the TUI has no trace — the
+    crash log never sees a Python exception because the kernel reaps
+    the process before the interpreter runs anything.
+    """
+    name = {
+        signal.SIGPIPE: "SIGPIPE",
+        signal.SIGTERM: "SIGTERM",
+        signal.SIGHUP: "SIGHUP",
+    }.get(signum, f"signal {signum}")
+    try:
+        os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
+        with open(_CRASH_LOG, "a", encoding="utf-8") as f:
+            f.write(
+                f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n"
+            )
+            if frame is not None:
+                f.write("main-thread stack at signal delivery:\n")
+                traceback.print_stack(frame, file=f)
+            # All live threads — signal may have been triggered by a
+            # background thread (write to broken stdout from TTS, etc.).
+            import threading as _threading
+            for tid, th in _threading._active.items():
+                f.write(f"\n--- thread {th.name} (id={tid}) ---\n")
+                f.write("".join(traceback.format_stack(sys._current_frames().get(tid))))
+    except Exception:
+        pass
+    print(f"[gateway-signal] {name}", file=sys.stderr, flush=True)
+    sys.exit(0)
+
+
+# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process
+# silently whenever a *background* thread (TTS playback chain, voice
+# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone
+# quiet on — even though the main thread was perfectly fine waiting on
+# stdin.  Ignoring the signal lets Python raise BrokenPipeError on the
+# offending write (write_json already handles that with a clean
+# sys.exit(0) + _log_exit), which keeps the gateway alive as long as
+# the main command pipe is still readable.  Terminal signals still
+# route through _log_signal so kills and hangups are diagnosable.
+signal.signal(signal.SIGPIPE, signal.SIG_IGN)
+signal.signal(signal.SIGTERM, _log_signal)
+signal.signal(signal.SIGHUP, _log_signal)
 signal.signal(signal.SIGINT, signal.SIG_IGN)


+def _log_exit(reason: str) -> None:
+    """Record why the gateway subprocess is shutting down.
+
+    Three exit paths (startup write fail, parse-error-response write fail,
+    dispatch-response write fail, stdin EOF) all collapse into a silent
+    sys.exit(0) here.  Without this trail the TUI shows "gateway exited"
+    with no actionable clue about WHICH broken pipe or WHICH message
+    triggered it — the main reason voice-mode turns look like phantom
+    crashes when the real story is "TUI read pipe closed on this event".
+    """
+    try:
+        os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
+        with open(_CRASH_LOG, "a", encoding="utf-8") as f:
+            f.write(
+                f"\n=== gateway exit · {time.strftime('%Y-%m-%d %H:%M:%S')} "
+                f"· reason={reason} ===\n"
+            )
+    except Exception:
+        pass
+    print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
+
+
 def main():
    if not write_json({
        "jsonrpc": "2.0",
        "method": "event",
        "params": {"type": "gateway.ready", "payload": {"skin": resolve_skin()}},
    }):
+        _log_exit("startup write failed (broken stdout pipe before first event)")
        sys.exit(0)

    for raw in sys.stdin:
@@ -25,14 +99,19 @@ def main():
            req = json.loads(line)
        except json.JSONDecodeError:
            if not write_json({"jsonrpc": "2.0", "error": {"code": -32700, "message": "parse error"}, "id": None}):
+                _log_exit("parse-error-response write failed (broken stdout pipe)")
                sys.exit(0)
            continue

+        method = req.get("method") if isinstance(req, dict) else None
        resp = dispatch(req)
        if resp is not None:
            if not write_json(resp):
+                _log_exit(f"response write failed for method={method!r} (broken stdout pipe)")
                sys.exit(0)

+    _log_exit("stdin EOF (TUI closed the command pipe)")
+

 if __name__ == "__main__":
    main()
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -23,6 +23,75 @@ load_hermes_dotenv(
    hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env"
 )

+
+# ── Panic logger ─────────────────────────────────────────────────────
+# Gateway crashes in a TUI session leave no forensics: stdout is the
+# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger
+# only catches handled warnings, and the subprocess exits before stderr
+# flushes through the stderr->gateway.stderr event pump. This hook
+# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log
+# AND re-emits a one-line summary to stderr so the TUI can surface it in
+# Activity — exactly what was missing when the voice-mode turns started
+# exiting the gateway mid-TTS.
+_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log")
+
+
+def _panic_hook(exc_type, exc_value, exc_tb):
+    import traceback
+
+    trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
+    try:
+        os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
+        with open(_CRASH_LOG, "a", encoding="utf-8") as f:
+            f.write(
+                f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n"
+            )
+            f.write(trace)
+    except Exception:
+        pass
+    # Stderr goes through to the TUI as a gateway.stderr Activity line —
+    # the first line here is what the user will see without opening any
+    # log files.  Rest of the stack is still in the log for full context.
+    first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__
+    print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True)
+    # Chain to the default hook so the process still terminates normally.
+    sys.__excepthook__(exc_type, exc_value, exc_tb)
+
+
+sys.excepthook = _panic_hook
+
+
+def _thread_panic_hook(args):
+    # threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread)
+    import traceback
+
+    trace = "".join(
+        traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback)
+    )
+    try:
+        os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
+        with open(_CRASH_LOG, "a", encoding="utf-8") as f:
+            f.write(
+                f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} "
+                f"· thread={args.thread.name} ===\n"
+            )
+            f.write(trace)
+    except Exception:
+        pass
+    first_line = (
+        str(args.exc_value).strip().splitlines()[0]
+        if str(args.exc_value).strip()
+        else args.exc_type.__name__
+    )
+    print(
+        f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}",
+        file=sys.stderr,
+        flush=True,
+    )
+
+
+threading.excepthook = _thread_panic_hook
+
 try:
    from hermes_cli.banner import prefetch_update_check

@@ -2126,7 +2195,43 @@ def _(rid, params: dict) -> dict:
            if rendered:
                payload["rendered"] = rendered
            _emit("message.complete", sid, payload)
+
+            # CLI parity: when voice-mode TTS is on, speak the agent reply
+            # (cli.py:_voice_speak_response).  Only the final text — tool
+            # calls / reasoning already stream separately and would be
+            # noisy to read aloud.
+            if (
+                status == "complete"
+                and isinstance(raw, str)
+                and raw.strip()
+                and _voice_tts_enabled()
+            ):
+                try:
+                    from hermes_cli.voice import speak_text
+
+                    spoken = raw
+                    threading.Thread(
+                        target=speak_text, args=(spoken,), daemon=True
+                    ).start()
+                except ImportError:
+                    logger.warning("voice TTS skipped: hermes_cli.voice unavailable")
+                except Exception as e:
+                    logger.warning("voice TTS dispatch failed: %s", e)
        except Exception as e:
+            import traceback
+
+            trace = traceback.format_exc()
+            try:
+                os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
+                with open(_CRASH_LOG, "a", encoding="utf-8") as f:
+                    f.write(
+                        f"\n=== turn-dispatcher exception · "
+                        f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n"
+                    )
+                    f.write(trace)
+            except Exception:
+                pass
+            print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True)
            _emit("error", sid, {"message": str(e)})
        finally:
            try:
@@ -3151,6 +3256,162 @@ def _(rid, params: dict) -> dict:

 # ── Methods: complete ─────────────────────────────────────────────────

+_FUZZY_CACHE_TTL_S = 5.0
+_FUZZY_CACHE_MAX_FILES = 20000
+_FUZZY_FALLBACK_EXCLUDES = frozenset(
+    {
+        ".git",
+        ".hg",
+        ".svn",
+        ".next",
+        ".cache",
+        ".venv",
+        "venv",
+        "node_modules",
+        "__pycache__",
+        "dist",
+        "build",
+        "target",
+        ".mypy_cache",
+        ".pytest_cache",
+        ".ruff_cache",
+    }
+)
+_fuzzy_cache_lock = threading.Lock()
+_fuzzy_cache: dict[str, tuple[float, list[str]]] = {}
+
+
+def _list_repo_files(root: str) -> list[str]:
+    """Return file paths relative to ``root``.
+
+    Uses ``git ls-files`` from the repo top (resolved via
+    ``rev-parse --show-toplevel``) so the listing covers tracked + untracked
+    files anywhere in the repo, then converts each path back to be relative
+    to ``root``. Files outside ``root`` (parent directories of cwd, sibling
+    subtrees) are excluded so the picker stays scoped to what's reachable
+    from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when
+    ``root`` isn't inside a git repo. Result cached per-root for
+    ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes.
+    """
+    now = time.monotonic()
+    with _fuzzy_cache_lock:
+        cached = _fuzzy_cache.get(root)
+        if cached and now - cached[0] < _FUZZY_CACHE_TTL_S:
+            return cached[1]
+
+    files: list[str] = []
+    try:
+        top_result = subprocess.run(
+            ["git", "-C", root, "rev-parse", "--show-toplevel"],
+            capture_output=True,
+            timeout=2.0,
+            check=False,
+        )
+        if top_result.returncode == 0:
+            top = top_result.stdout.decode("utf-8", "replace").strip()
+            list_result = subprocess.run(
+                ["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
+                capture_output=True,
+                timeout=2.0,
+                check=False,
+            )
+            if list_result.returncode == 0:
+                for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
+                    if not p:
+                        continue
+                    rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
+                    # Skip parents/siblings of cwd — keep the picker scoped
+                    # to root-and-below, matching Cmd-P workspace semantics.
+                    if rel.startswith("../"):
+                        continue
+                    files.append(rel)
+                    if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                        break
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+
+    if not files:
+        # Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays
+        # tractable. Dotfiles themselves survive — the ranker decides based
+        # on whether the query starts with `.`.
+        try:
+            for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
+                dirnames[:] = [
+                    d
+                    for d in dirnames
+                    if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".")
+                ]
+                rel_dir = os.path.relpath(dirpath, root)
+                for f in filenames:
+                    rel = f if rel_dir == "." else f"{rel_dir}/{f}"
+                    files.append(rel.replace(os.sep, "/"))
+                    if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                        break
+                if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                    break
+        except OSError:
+            pass
+
+    with _fuzzy_cache_lock:
+        _fuzzy_cache[root] = (now, files)
+
+    return files
+
+
+def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None:
+    """Rank ``name`` against ``query``; lower is better. Returns None to reject.
+
+    Tiers (kind):
+      0 — exact basename
+      1 — basename prefix (e.g. `app` → `appChrome.tsx`)
+      2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`)
+      3 — substring anywhere in basename
+      4 — subsequence match (every query char appears in order)
+
+    Secondary key is `len(name)` so shorter names win ties.
+    """
+    if not query:
+        return (3, len(name))
+
+    nl = name.lower()
+    ql = query.lower()
+
+    if nl == ql:
+        return (0, len(name))
+
+    if nl.startswith(ql):
+        return (1, len(name))
+
+    # Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"].
+    # camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation;
+    # falls through to substring/subsequence if it misses.
+    parts: list[str] = []
+    buf = ""
+    for ch in name:
+        if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()):
+            if buf:
+                parts.append(buf)
+            buf = ch if ch not in "-_." else ""
+        else:
+            buf += ch
+    if buf:
+        parts.append(buf)
+    for p in parts:
+        if p.lower().startswith(ql):
+            return (2, len(name))
+
+    if ql in nl:
+        return (3, len(name))
+
+    i = 0
+    for ch in nl:
+        if ch == ql[i]:
+            i += 1
+            if i == len(ql):
+                return (4, len(name))
+
+    return None
+

@method("complete.path")
 def _(rid, params: dict) -> dict:
@@ -3186,6 +3447,42 @@ def _(rid, params: dict) -> dict:
            prefix_tag = ""
            path_part = query if is_context else query

+        # Fuzzy basename search across the repo when the user types a bare
+        # name with no path separator — `@appChrome` surfaces every file
+        # whose basename matches, regardless of directory depth. Matches what
+        # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
+        # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
+        # path so explicit navigation intent is preserved.
+        if (
+            is_context
+            and path_part
+            and "/" not in path_part
+            and prefix_tag != "folder"
+        ):
+            root = os.getcwd()
+            ranked: list[tuple[tuple[int, int], str, str]] = []
+            for rel in _list_repo_files(root):
+                basename = os.path.basename(rel)
+                if basename.startswith(".") and not path_part.startswith("."):
+                    continue
+                rank = _fuzzy_basename_rank(basename, path_part)
+                if rank is None:
+                    continue
+                ranked.append((rank, rel, basename))
+
+            ranked.sort(key=lambda r: (r[0], len(r[1]), r[1]))
+            tag = prefix_tag or "file"
+            for _, rel, basename in ranked[:30]:
+                items.append(
+                    {
+                        "text": f"@{tag}:{rel}",
+                        "display": basename,
+                        "meta": os.path.dirname(rel),
+                    }
+                )
+
+            return _ok(rid, {"items": items})
+
        expanded = _normalize_completion_path(path_part) if path_part else "."
        if expanded == "." or not expanded:
            search_dir, match = ".", ""
@@ -3455,43 +3752,155 @@ def _(rid, params: dict) -> dict:
 # ── Methods: voice ───────────────────────────────────────────────────


+_voice_sid_lock = threading.Lock()
+_voice_event_sid: str = ""
+
+
+def _voice_emit(event: str, payload: dict | None = None) -> None:
+    """Emit a voice event toward the session that most recently turned the
+    mode on. Voice is process-global (one microphone), so there's only ever
+    one sid to target; the TUI handler treats an empty sid as "active
+    session". Kept separate from _emit to make the lack of per-call sid
+    argument explicit."""
+    with _voice_sid_lock:
+        sid = _voice_event_sid
+    _emit(event, sid, payload)
+
+
+def _voice_mode_enabled() -> bool:
+    """Current voice-mode flag (runtime-only, CLI parity).
+
+    cli.py initialises ``_voice_mode = False`` at startup and only flips
+    it via ``/voice on``; it never reads a persisted enable bit from
+    config.yaml.  We match that: no config lookup, env var only.  This
+    avoids the TUI auto-starting in REC the next time the user opens it
+    just because they happened to enable voice in a prior session.
+    """
+    return os.environ.get("HERMES_VOICE", "").strip() == "1"
+
+
+def _voice_tts_enabled() -> bool:
+    """Whether agent replies should be spoken back via TTS (runtime only)."""
+    return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1"
+
+
@method("voice.toggle")
 def _(rid, params: dict) -> dict:
+    """CLI parity for the ``/voice`` slash command.
+
+    Subcommands:
+
+    * ``status`` — report mode + TTS flags (default when action is unknown).
+    * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it
+      off also tears down any active continuous recording loop. Does NOT
+      start recording on its own; recording is driven by ``voice.record``
+      (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split.
+    * ``tts`` — toggle speech-output of agent replies. Requires mode on
+      (mirrors CLI's _toggle_voice_tts guard).
+    """
    action = params.get("action", "status")
+
    if action == "status":
-        env = os.environ.get("HERMES_VOICE", "").strip()
-        if env in {"0", "1"}:
-            return _ok(rid, {"enabled": env == "1"})
-        return _ok(
-            rid,
-            {
-                "enabled": bool(
-                    _load_cfg().get("display", {}).get("voice_enabled", False)
-                )
-            },
-        )
+        # Mirror CLI's _show_voice_status: include STT/TTS provider
+        # availability so the user can tell at a glance *why* voice mode
+        # isn't working ("STT provider: MISSING ..." is the common case).
+        payload: dict = {
+            "enabled": _voice_mode_enabled(),
+            "tts": _voice_tts_enabled(),
+        }
+        try:
+            from tools.voice_mode import check_voice_requirements
+
+            reqs = check_voice_requirements()
+            payload["available"] = bool(reqs.get("available"))
+            payload["audio_available"] = bool(reqs.get("audio_available"))
+            payload["stt_available"] = bool(reqs.get("stt_available"))
+            payload["details"] = reqs.get("details") or ""
+        except Exception as e:
+            # check_voice_requirements pulls optional transcription deps —
+            # swallow so /voice status always returns something useful.
+            logger.warning("voice.toggle status: requirements probe failed: %s", e)
+
+        return _ok(rid, payload)
+
    if action in ("on", "off"):
        enabled = action == "on"
+        # Runtime-only flag (CLI parity) — no _write_config_key, so the
+        # next TUI launch starts with voice OFF instead of auto-REC from a
+        # persisted stale toggle.
        os.environ["HERMES_VOICE"] = "1" if enabled else "0"
-        _write_config_key("display.voice_enabled", enabled)
-        return _ok(rid, {"enabled": action == "on"})
+
+        if not enabled:
+            # Disabling the mode must tear the continuous loop down; the
+            # loop holds the microphone and would otherwise keep running.
+            try:
+                from hermes_cli.voice import stop_continuous
+
+                stop_continuous()
+            except ImportError:
+                pass
+            except Exception as e:
+                logger.warning("voice: stop_continuous failed during toggle off: %s", e)
+
+        return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()})
+
+    if action == "tts":
+        if not _voice_mode_enabled():
+            return _err(rid, 4014, "enable voice mode first: /voice on")
+        new_value = not _voice_tts_enabled()
+        # Runtime-only flag (CLI parity) — see voice.toggle on/off above.
+        os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0"
+        return _ok(rid, {"enabled": True, "tts": new_value})
+
    return _err(rid, 4013, f"unknown voice action: {action}")


@method("voice.record")
 def _(rid, params: dict) -> dict:
+    """VAD-driven continuous record loop, CLI-parity.
+
+    ``start`` turns on a VAD loop that emits ``voice.transcript`` events
+    for each detected utterance and auto-restarts for the next turn.
+    ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
+    recording branch clearing ``_voice_continuous``). Three consecutive
+    silent cycles stop the loop automatically and emit a
+    ``voice.transcript`` with ``no_speech_limit=True``.
+    """
    action = params.get("action", "start")
+
+    if action not in {"start", "stop"}:
+        return _err(rid, 4019, f"unknown voice action: {action}")
+
    try:
        if action == "start":
-            from hermes_cli.voice import start_recording
+            if not _voice_mode_enabled():
+                return _err(rid, 4015, "voice mode is off — enable with /voice on")

-            start_recording()
+            with _voice_sid_lock:
+                global _voice_event_sid
+                _voice_event_sid = params.get("session_id") or _voice_event_sid
+
+            from hermes_cli.voice import start_continuous
+
+            voice_cfg = _load_cfg().get("voice", {})
+            start_continuous(
+                on_transcript=lambda t: _voice_emit(
+                    "voice.transcript", {"text": t}
+                ),
+                on_status=lambda s: _voice_emit("voice.status", {"state": s}),
+                on_silent_limit=lambda: _voice_emit(
+                    "voice.transcript", {"no_speech_limit": True}
+                ),
+                silence_threshold=voice_cfg.get("silence_threshold", 200),
+                silence_duration=voice_cfg.get("silence_duration", 3.0),
+            )
            return _ok(rid, {"status": "recording"})
-        if action == "stop":
-            from hermes_cli.voice import stop_and_transcribe

-            return _ok(rid, {"text": stop_and_transcribe() or ""})
-        return _err(rid, 4019, f"unknown voice action: {action}")
+        # action == "stop"
+        from hermes_cli.voice import stop_continuous
+
+        stop_continuous()
+        return _ok(rid, {"status": "stopped"})
    except ImportError:
        return _err(
            rid, 5025, "voice module not available — install audio dependencies"
--- a/ui-tui/packages/hermes-ink/src/ink/dom.ts
+++ b/ui-tui/packages/hermes-ink/src/ink/dom.ts
@@ -83,6 +83,10 @@ export type DOMElement = {
  // Only set on ink-root. The document owns focus — any node can
  // reach it by walking parentNode, like browser getRootNode().
  focusManager?: FocusManager
+  // Measurement cache for ink-text nodes: avoids re-squashing and re-wrapping
+  // text when yoga calls measureFunc multiple times per frame with different
+  // widths during flex re-pass. Keyed by `${width}|${widthMode}`.
+  _textMeasureCache?: { gen: number; entries: Map<string, { _gen: number; result: { width: number; height: number } }> }
 } & InkNode

 export type TextNode = {
@@ -311,10 +315,42 @@ export const createTextNode = (text: string): TextNode => {
  return node
 }

+const MEASURE_CACHE_CAP = 16
+
 const measureTextNode = function (
  node: DOMNode,
  width: number,
  widthMode: LayoutMeasureMode
+): { width: number; height: number } {
+  const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode
+  if (elem && elem.nodeName === 'ink-text') {
+    let cache = elem._textMeasureCache
+    if (!cache) {
+      cache = { gen: 0, entries: new Map() }
+      elem._textMeasureCache = cache
+    }
+    const key = `${width}|${widthMode}`
+    const hit = cache.entries.get(key)
+    if (hit && hit._gen === cache.gen) {
+      return hit.result
+    }
+    const result = computeTextMeasure(node, width, widthMode)
+    // Enforce cap with FIFO eviction to avoid unbounded growth during
+    // pathological frames where yoga probes many widths.
+    if (cache.entries.size >= MEASURE_CACHE_CAP) {
+      const firstKey = cache.entries.keys().next().value
+      cache.entries.delete(firstKey)
+    }
+    cache.entries.set(key, { _gen: cache.gen, result })
+    return result
+  }
+  return computeTextMeasure(node, width, widthMode)
+}
+
+const computeTextMeasure = function (
+  node: DOMNode,
+  width: number,
+  widthMode: LayoutMeasureMode
 ): { width: number; height: number } {
  const rawText = node.nodeName === '#text' ? node.nodeValue : squashTextNodes(node)

@@ -378,13 +414,19 @@ export const markDirty = (node?: DOMNode): void => {

  while (current) {
    if (current.nodeName !== '#text') {
-      ;(current as DOMElement).dirty = true
+      const elem = current as DOMElement
+      elem.dirty = true

      // Only mark yoga dirty on leaf nodes that have measure functions
-      if (!markedYoga && (current.nodeName === 'ink-text' || current.nodeName === 'ink-raw-ansi') && current.yogaNode) {
-        current.yogaNode.markDirty()
+      if (!markedYoga && (elem.nodeName === 'ink-text' || elem.nodeName === 'ink-raw-ansi') && elem.yogaNode) {
+        elem.yogaNode.markDirty()
        markedYoga = true
      }
+
+      // Invalidate text measurement cache — child text or style changed.
+      if (elem._textMeasureCache) {
+        elem._textMeasureCache.gen++
+      }
    }

    current = current.parentNode
@@ -433,6 +475,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => {
    for (const child of node.childNodes) {
      clearYogaNodeReferences(child)
    }
+    node._textMeasureCache = undefined
  }

  node.yogaNode = undefined
--- a/ui-tui/src/tests/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/tests/createGatewayEventHandler.test.ts
@@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) =>
    composer: {
      dequeue: () => undefined,
      queueEditRef: ref<null | number>(null),
-      sendQueued: vi.fn()
+      sendQueued: vi.fn(),
+      setInput: vi.fn()
    },
    gateway: {
      gw: { request: vi.fn() },
@@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) =>
      resumeById: vi.fn(),
      setCatalog: vi.fn()
    },
+    submission: {
+      submitRef: { current: vi.fn() }
+    },
    system: {
      bellOnComplete: false,
      sys: vi.fn()
@@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) =>
      panel: (title: string, sections: any[]) =>
        appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }),
      setHistoryItems: vi.fn()
+    },
+    voice: {
+      setProcessing: vi.fn(),
+      setRecording: vi.fn(),
+      setVoiceEnabled: vi.fn()
    }
  }) as any

@@ -143,91 +152,79 @@ describe('createGatewayEventHandler', () => {
    expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
  })

-  it('attaches inline_diff to the assistant completion body', () => {
+  it('anchors inline_diff as its own segment where the edit happened', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const block = `\`\`\`diff\n${cleaned}\n\`\`\``

-    onEvent({
-      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
-      type: 'tool.start'
-    } as any)
-    onEvent({
-      payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
+    // Narration → tool → tool-complete → more narration → message-complete.
+    // The diff MUST land between the two narration segments, not tacked
+    // onto the final one.
+    onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
+    onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any)
+    onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)

-    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    // Diff is already committed to segmentMessages as its own segment.
    expect(appended).toHaveLength(0)
-    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+    expect(turnController.segmentMessages).toEqual([
+      { role: 'assistant', text: 'Editing the file' },
+      { kind: 'diff', role: 'assistant', text: block }
+    ])

-    onEvent({
-      payload: { text: 'patch applied' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)

-    // Diff is rendered in the same assistant message body as the completion.
-    expect(appended).toHaveLength(1)
-    expect(appended[0]).toMatchObject({ role: 'assistant' })
-    expect(appended[0]?.text).toContain('patch applied')
-    expect(appended[0]?.text).toContain('```diff')
-    expect(appended[0]?.text).toContain(cleaned)
+    // Three transcript messages: pre-tool narration → diff (kind='diff',
+    // so MessageLine gives it blank-line breathing room) → post-tool
+    // narration. The final message does NOT contain a diff.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.text).toBe('Editing the file')
+    expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
+    expect(appended[2]?.text).toBe('patch applied')
+    expect(appended[2]?.text).not.toContain('```diff')
  })

-  it('does not append inline_diff twice when assistant text already contains it', () => {
+  it('drops the diff segment when the final assistant text narrates the same diff', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``

-    onEvent({
-      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

+    // Only the final message — diff-only segment dropped so we don't
+    // render two stacked copies of the same patch.
    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
  })

-  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+  it('strips the CLI "┊ review diff" header from inline diff segments', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'

-    onEvent({
-      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
+    // diff segment first (kind='diff'), final narration second
+    expect(appended).toHaveLength(2)
+    expect(appended[0]?.kind).toBe('diff')
    expect(appended[0]?.text).not.toContain('┊ review diff')
    expect(appended[0]?.text).toContain('--- a/foo.ts')
+    expect(appended[1]?.text).toBe('done')
  })

-  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+  it('drops the diff segment when assistant writes its own ```diff fence', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'

-    onEvent({
-      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
@@ -243,15 +240,18 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
-    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
-    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    // Two segments: the diff block (kind='diff', no tool row) and the final
+    // narration (tool row belongs here since pendingSegmentTools carries
+    // across the flushStreamingSegment call).
+    expect(appended).toHaveLength(2)
+    expect(appended[0]?.kind).toBe('diff')
    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.tools ?? []).toEqual([])
+    expect(appended[1]?.text).toBe('done')
+    expect(appended[1]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
  })

  it('shows setup panel for missing provider startup error', () => {
--- a/ui-tui/src/tests/platform.test.ts
+++ b/ui-tui/src/tests/platform.test.ts
@@ -31,6 +31,36 @@ describe('platform action modifier', () => {
  })
 })

+describe('isVoiceToggleKey', () => {
+  it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => {
+    const { isVoiceToggleKey } = await importPlatform('darwin')
+
+    expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true)
+    expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'B')).toBe(true)
+  })
+
+  it('matches Cmd+B on macOS (preserve platform muscle memory)', async () => {
+    const { isVoiceToggleKey } = await importPlatform('darwin')
+
+    expect(isVoiceToggleKey({ ctrl: false, meta: true, super: false }, 'b')).toBe(true)
+    expect(isVoiceToggleKey({ ctrl: false, meta: false, super: true }, 'b')).toBe(true)
+  })
+
+  it('matches Ctrl+B on non-macOS platforms', async () => {
+    const { isVoiceToggleKey } = await importPlatform('linux')
+
+    expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true)
+  })
+
+  it('does not match unmodified b or other Ctrl combos', async () => {
+    const { isVoiceToggleKey } = await importPlatform('darwin')
+
+    expect(isVoiceToggleKey({ ctrl: false, meta: false, super: false }, 'b')).toBe(false)
+    expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'a')).toBe(false)
+    expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'c')).toBe(false)
+  })
+})
+
 describe('isMacActionFallback', () => {
  it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => {
    const { isMacActionFallback } = await importPlatform('darwin')
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
  const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
  const { bellOnComplete, stdout, sys } = ctx.system
  const { appendMessage, panel, setHistoryItems } = ctx.transcript
+  const { setInput } = ctx.composer
+  const { submitRef } = ctx.submission
+  const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice

  let pendingThinkingStatus = ''
  let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
@@ -261,6 +264,57 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
        return
      }

+      case 'voice.status': {
+        // Continuous VAD loop reports its internal state so the status bar
+        // can show listening / transcribing / idle without polling.
+        const state = String(ev.payload?.state ?? '')
+
+        if (state === 'listening') {
+          setVoiceRecording(true)
+          setVoiceProcessing(false)
+        } else if (state === 'transcribing') {
+          setVoiceRecording(false)
+          setVoiceProcessing(true)
+        } else {
+          setVoiceRecording(false)
+          setVoiceProcessing(false)
+        }
+
+        return
+      }
+
+      case 'voice.transcript': {
+        // CLI parity: the 3-strikes silence detector flipped off automatically.
+        // Mirror that on the UI side and tell the user why the mode is off.
+        if (ev.payload?.no_speech_limit) {
+          setVoiceEnabled(false)
+          setVoiceRecording(false)
+          setVoiceProcessing(false)
+          sys('voice: no speech detected 3 times, continuous mode stopped')
+
+          return
+        }
+
+        const text = String(ev.payload?.text ?? '').trim()
+
+        if (!text) {
+          return
+        }
+
+        // CLI parity: _pending_input.put(transcript) unconditionally feeds
+        // the transcript to the agent as its next turn — draft handling
+        // doesn't apply because voice-mode users are speaking, not typing.
+        //
+        // We can't branch on composer input from inside a setInput updater
+        // (React strict mode double-invokes it, duplicating the submit).
+        // Just clear + defer submit so the cleared input is committed before
+        // submit reads it.
+        setInput('')
+        setTimeout(() => submitRef.current(text), 0)
+
+        return
+      }
+
      case 'gateway.start_timeout': {
        const { cwd, python } = ev.payload ?? {}
        const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : ''
@@ -331,10 +385,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          return
        }

-        // Keep inline diffs attached to the assistant completion body so
-        // they render in the same message flow, not as a standalone system
-        // artifact that can look out-of-place around tool rows.
-        turnController.queueInlineDiff(inlineDiffText)
+        // Anchor the diff to where the edit happened in the turn — between
+        // the narration that preceded the tool call and whatever the agent
+        // streams afterwards. The previous end-merge put the diff at the
+        // bottom of the final message even when the edit fired mid-turn,
+        // which read as "the agent wrote this after saying that".
+        turnController.pushInlineDiffSegment(inlineDiffText)

        return
      }
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -189,9 +189,11 @@ export interface InputHandlerContext {
    stdout?: NodeJS.WriteStream
  }
  voice: {
+    enabled: boolean
    recording: boolean
    setProcessing: StateSetter<boolean>
    setRecording: StateSetter<boolean>
+    setVoiceEnabled: StateSetter<boolean>
  }
  wheelStep: number
 }
@@ -201,6 +203,9 @@ export interface InputHandlerResult {
 }

 export interface GatewayEventHandlerContext {
+  composer: {
+    setInput: StateSetter<string>
+  }
  gateway: GatewayServices
  session: {
    STARTUP_RESUME_ID: string
@@ -210,6 +215,9 @@ export interface GatewayEventHandlerContext {
    resumeById: (id: string) => void
    setCatalog: StateSetter<null | SlashCatalog>
  }
+  submission: {
+    submitRef: MutableRefObject<(value: string) => void>
+  }
  system: {
    bellOnComplete: boolean
    stdout?: NodeJS.WriteStream
@@ -220,6 +228,11 @@ export interface GatewayEventHandlerContext {
    panel: (title: string, sections: PanelSection[]) => void
    setHistoryItems: StateSetter<Msg[]>
  }
+  voice: {
+    setProcessing: StateSetter<boolean>
+    setRecording: StateSetter<boolean>
+    setVoiceEnabled: StateSetter<boolean>
+  }
 }

 export interface SlashHandlerContext {
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -184,15 +184,64 @@ export const sessionCommands: SlashCommand[] = [
  },

  {
-    help: 'toggle voice input',
+    help: 'voice mode: [on|off|tts|status]',
    name: 'voice',
    run: (arg, ctx) => {
-      const action = arg === 'on' || arg === 'off' ? arg : 'status'
+      const normalized = (arg ?? '').trim().toLowerCase()
+
+      const action =
+        normalized === 'on' || normalized === 'off' || normalized === 'tts' || normalized === 'status'
+          ? normalized
+          : 'status'

      ctx.gateway.rpc<VoiceToggleResponse>('voice.toggle', { action }).then(
        ctx.guarded<VoiceToggleResponse>(r => {
          ctx.voice.setVoiceEnabled(!!r.enabled)
-          ctx.transcript.sys(`voice: ${r.enabled ? 'on' : 'off'}`)
+
+          // Match CLI's _show_voice_status / _enable_voice_mode /
+          // _toggle_voice_tts output shape so users don't have to learn
+          // two vocabularies.
+          if (action === 'status') {
+            const mode = r.enabled ? 'ON' : 'OFF'
+            const tts = r.tts ? 'ON' : 'OFF'
+            ctx.transcript.sys('Voice Mode Status')
+            ctx.transcript.sys(`  Mode:       ${mode}`)
+            ctx.transcript.sys(`  TTS:        ${tts}`)
+            ctx.transcript.sys('  Record key: Ctrl+B')
+
+            // CLI's "Requirements:" block — surfaces STT/audio setup issues
+            // so the user sees "STT provider: MISSING ..." instead of
+            // silently failing on every Ctrl+B press.
+            if (r.details) {
+              ctx.transcript.sys('')
+              ctx.transcript.sys('  Requirements:')
+
+              for (const line of r.details.split('\n')) {
+                if (line.trim()) {
+                  ctx.transcript.sys(`    ${line}`)
+                }
+              }
+            }
+
+            return
+          }
+
+          if (action === 'tts') {
+            ctx.transcript.sys(`Voice TTS ${r.tts ? 'enabled' : 'disabled'}.`)
+
+            return
+          }
+
+          // on/off — mirror cli.py:_enable_voice_mode's 3-line output
+          if (r.enabled) {
+            const tts = r.tts ? ' (TTS enabled)' : ''
+            ctx.transcript.sys(`Voice mode enabled${tts}`)
+            ctx.transcript.sys('  Ctrl+B to start/stop recording')
+            ctx.transcript.sys('  /voice tts  to toggle speech output')
+            ctx.transcript.sys('  /voice off  to disable voice mode')
+          } else {
+            ctx.transcript.sys('Voice mode disabled.')
+          }
        })
      )
    }
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -19,6 +19,20 @@ const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
 const TRAIL_LIMIT = 8

+// Extracts the raw patch from a diff-only segment produced by
+// pushInlineDiffSegment. Used at message.complete to dedupe against final
+// assistant text that narrates the same patch. Returns null for anything
+// else so real assistant narration never gets touched.
+const diffSegmentBody = (msg: Msg): null | string => {
+  if (msg.kind !== 'diff') {
+    return null
+  }
+
+  const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/)
+
+  return m ? m[1]! : null
+}
+
 export interface InterruptDeps {
  appendMessage: (msg: Msg) => void
  gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@@ -40,7 +54,6 @@ class TurnController {
  bufRef = ''
  interrupted = false
  lastStatusNote = ''
-  pendingInlineDiffs: string[] = []
  persistedToolLabels = new Set<string>()
  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
  protocolWarned = false
@@ -79,7 +92,6 @@ class TurnController {
    this.activeTools = []
    this.streamTimer = clear(this.streamTimer)
    this.bufRef = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []

@@ -186,18 +198,35 @@ class TurnController {
    }, REASONING_PULSE_MS)
  }

-  queueInlineDiff(diffText: string) {
+  pushInlineDiffSegment(diffText: string) {
    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
    // leading "┊ review diff" header written by `_emit_inline_diff` for the
    // terminal printer). That header only makes sense as stdout dressing,
    // not inside a markdown ```diff block.
-    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+    const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()

-    if (!text || this.pendingInlineDiffs.includes(text)) {
+    if (!stripped) {
      return
    }

-    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+    // Flush any in-progress streaming text as its own segment first, so the
+    // diff lands BETWEEN the assistant narration that preceded the edit and
+    // whatever the agent streams afterwards — not glued onto the final
+    // message. This is the whole point of segment-anchored diffs: the diff
+    // renders where the edit actually happened.
+    this.flushStreamingSegment()
+
+    const block = `\`\`\`diff\n${stripped}\n\`\`\``
+
+    // Skip consecutive duplicates (same tool firing tool.complete twice, or
+    // two edits producing the same patch). Keeping this cheap — deeper
+    // dedupe against the final assistant text happens at message.complete.
+    if (this.segmentMessages.at(-1)?.text === block) {
+      return
+    }
+
+    this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }]
+    patchTurnState({ streamSegments: this.segmentMessages })
  }

  pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
@@ -234,7 +263,6 @@ class TurnController {
    this.idle()
    this.clearReasoning()
    this.clearStatusTimer()
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []
    this.turnTools = []
@@ -245,31 +273,31 @@ class TurnController {
    const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
    const split = splitReasoning(rawText)
    const finalText = split.text
-    // Skip appending if the assistant already narrated the diff inside a
-    // markdown fence of its own — otherwise we render two stacked diff
-    // blocks for the same edit.
-    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
-
-    const remainingInlineDiffs = assistantAlreadyHasDiff
-      ? []
-      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
-
-    const inlineDiffBlock = remainingInlineDiffs.length
-      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
-      : ''
-
-    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
    const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
    const savedToolTokens = this.toolTokenAcc
    const tools = this.pendingSegmentTools
-    const finalMessages = [...this.segmentMessages]

-    if (mergedText) {
+    // Drop diff-only segments the agent is about to narrate in the final
+    // reply. Without this, a closing "here's the diff …" message would
+    // render two stacked copies of the same patch. Only touches segments
+    // with `kind: 'diff'` emitted by pushInlineDiffSegment — real
+    // assistant narration stays put.
+    const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
+
+    const segments = this.segmentMessages.filter(msg => {
+      const body = diffSegmentBody(msg)
+
+      return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
+    })
+
+    const finalMessages = [...segments]
+
+    if (finalText) {
      finalMessages.push({
        role: 'assistant',
-        text: mergedText,
+        text: finalText,
        thinking: savedReasoning || undefined,
        thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
        toolTokens: savedToolTokens || undefined,
@@ -300,7 +328,7 @@ class TurnController {
    this.bufRef = ''
    patchTurnState({ activity: [], outcome: '' })

-    return { finalMessages, finalText: mergedText, wasInterrupted }
+    return { finalMessages, finalText, wasInterrupted }
  }

  recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -406,7 +434,6 @@ class TurnController {
    this.bufRef = ''
    this.interrupted = false
    this.lastStatusNote = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.protocolWarned = false
    this.segmentMessages = []
@@ -452,7 +479,6 @@ class TurnController {
    this.endReasoningPhase()
    this.clearReasoning()
    this.activeTools = []
-    this.pendingInlineDiffs = []
    this.turnTools = []
    this.toolTokenAcc = 0
    this.persistedToolLabels.clear()
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -8,7 +8,7 @@ import type {
  SudoRespondResponse,
  VoiceRecordResponse
 } from '../gatewayTypes.js'
-import { isAction, isMac } from '../lib/platform.js'
+import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js'

 import { getInputSelection } from './inputSelectionStore.js'
 import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
@@ -134,45 +134,43 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
    }
  }

-  const voiceStop = () => {
-    voice.setRecording(false)
-    voice.setProcessing(true)
+  // CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
+  // (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
+  // Ctrl+B while the mode is off sys-nudges the user. While the mode is
+  // on, the first press starts a continuous loop (gateway → start_continuous,
+  // VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
+  // The gateway publishes voice.status + voice.transcript events that
+  // createGatewayEventHandler turns into UI badges and composer injection.
+  const voiceRecordToggle = () => {
+    if (!voice.enabled) {
+      return actions.sys('voice: mode is off — enable with /voice on')
+    }
+
+    const starting = !voice.recording
+    const action = starting ? 'start' : 'stop'
+
+    // Optimistic UI — flip the REC badge immediately so the user gets
+    // feedback while the RPC round-trips; the voice.status event is the
+    // authoritative source and may correct us.
+    if (starting) {
+      voice.setRecording(true)
+    } else {
+      voice.setRecording(false)
+      voice.setProcessing(false)
+    }

    gateway
-      .rpc<VoiceRecordResponse>('voice.record', { action: 'stop' })
-      .then(r => {
-        if (!r) {
-          return
+      .rpc<VoiceRecordResponse>('voice.record', { action })
+      .catch((e: Error) => {
+        // Revert optimistic UI on failure.
+        if (starting) {
+          voice.setRecording(false)
        }

-        const transcript = String(r.text || '').trim()
-
-        if (!transcript) {
-          return actions.sys('voice: no speech detected')
-        }
-
-        cActions.setInput(prev => (prev ? `${prev}${/\s$/.test(prev) ? '' : ' '}${transcript}` : transcript))
-      })
-      .catch((e: Error) => actions.sys(`voice error: ${e.message}`))
-      .finally(() => {
-        voice.setProcessing(false)
-        patchUiState({ status: 'ready' })
+        actions.sys(`voice error: ${e.message}`)
      })
  }

-  const voiceStart = () =>
-    gateway
-      .rpc<VoiceRecordResponse>('voice.record', { action: 'start' })
-      .then(r => {
-        if (!r) {
-          return
-        }
-
-        voice.setRecording(true)
-        patchUiState({ status: 'recording…' })
-      })
-      .catch((e: Error) => actions.sys(`voice error: ${e.message}`))
-
  useInput((ch, key) => {
    const live = getUiState()

@@ -370,8 +368,8 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
      return actions.newSession()
    }

-    if (isAction(key, ch, 'b')) {
-      return voice.recording ? voiceStop() : voiceStart()
+    if (isVoiceToggleKey(key, ch)) {
+      return voiceRecordToggle()
    }

    if (isAction(key, ch, 'g')) {
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -454,13 +454,20 @@ export function useMainApp(gw: GatewayClient) {
    composer: { actions: composerActions, refs: composerRefs, state: composerState },
    gateway,
    terminal: { hasSelection, scrollRef, scrollWithSelection, selection, stdout },
-    voice: { recording: voiceRecording, setProcessing: setVoiceProcessing, setRecording: setVoiceRecording },
+    voice: {
+      enabled: voiceEnabled,
+      recording: voiceRecording,
+      setProcessing: setVoiceProcessing,
+      setRecording: setVoiceRecording,
+      setVoiceEnabled
+    },
    wheelStep: WHEEL_SCROLL_STEP
  })

  const onEvent = useMemo(
    () =>
      createGatewayEventHandler({
+        composer: { setInput: composerActions.setInput },
        gateway,
        session: {
          STARTUP_RESUME_ID,
@@ -470,18 +477,29 @@ export function useMainApp(gw: GatewayClient) {
          resumeById: session.resumeById,
          setCatalog
        },
+        submission: { submitRef },
        system: { bellOnComplete, stdout, sys },
-        transcript: { appendMessage, panel, setHistoryItems }
+        transcript: { appendMessage, panel, setHistoryItems },
+        voice: {
+          setProcessing: setVoiceProcessing,
+          setRecording: setVoiceRecording,
+          setVoiceEnabled
+        }
      }),
    [
      appendMessage,
      bellOnComplete,
+      composerActions.setInput,
      gateway,
      panel,
      session.newSession,
      session.resetSession,
      session.resumeById,
+      setVoiceEnabled,
+      setVoiceProcessing,
+      setVoiceRecording,
      stdout,
+      submitRef,
      sys
    ]
  )
@@ -698,7 +716,9 @@ export function useMainApp(gw: GatewayClient) {
      statusColor: statusColorOf(ui.status, ui.theme.color),
      stickyPrompt,
      turnStartedAt: ui.sid ? turnStartedAt : null,
-      voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
+      // CLI parity: the classic prompt_toolkit status bar shows a red dot
+      // on REC (cli.py:_get_voice_status_fragments line 2344).
+      voiceLabel: voiceRecording ? '● REC' : voiceProcessing ? '◉ STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
    }),
    [
      cwd,
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -215,7 +215,20 @@ export function StatusRule({
            </Text>
          ) : null}
          <SpawnHud t={t} />
-          {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
+          {voiceLabel ? (
+            <Text
+              color={
+                voiceLabel.startsWith('●')
+                  ? t.color.error
+                  : voiceLabel.startsWith('◉')
+                    ? t.color.warn
+                    : t.color.dim
+              }
+            >
+              {' │ '}
+              {voiceLabel}
+            </Text>
+          ) : null}
          {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
          {showCost && typeof usage.cost_usd === 'number' ? (
            <Text color={t.color.dim}> │ ${usage.cost_usd.toFixed(4)}</Text>
--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -81,11 +81,16 @@ export const MessageLine = memo(function MessageLine({
    return <Text {...(body ? { color: body } : {})}>{msg.text}</Text>
  })()

+  // Diff segments (emitted by pushInlineDiffSegment between narration
+  // segments) need a blank line on both sides so the patch doesn't butt up
+  // against the prose around it.
+  const isDiffSegment = msg.kind === 'diff'
+
  return (
    <Box
      flexDirection="column"
-      marginBottom={msg.role === 'user' ? 1 : 0}
-      marginTop={msg.role === 'user' || msg.kind === 'slash' ? 1 : 0}
+      marginBottom={msg.role === 'user' || isDiffSegment ? 1 : 0}
+      marginTop={msg.role === 'user' || msg.kind === 'slash' || isDiffSegment ? 1 : 0}
    >
      {showDetails && (
        <Box flexDirection="column" marginBottom={1}>
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -623,7 +623,19 @@ export function TextInput({
        return
      }

-      if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
+      // Ctrl+B is the documented voice-recording toggle (see platform.ts →
+      // isVoiceToggleKey). Pass it through so the app-level handler in
+      // useInputHandlers receives it instead of being swallowed here as
+      // either backward-word nav (line below) or a literal 'b' insertion.
+      if (
+        (k.ctrl && inp === 'c') ||
+        (k.ctrl && inp === 'b') ||
+        k.tab ||
+        (k.shift && k.tab) ||
+        k.pageUp ||
+        k.pageDown ||
+        k.escape
+      ) {
        return
      }

--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -236,10 +236,16 @@ export interface ImageAttachResponse {
 // ── Voice ────────────────────────────────────────────────────────────

 export interface VoiceToggleResponse {
+  audio_available?: boolean
+  available?: boolean
+  details?: string
  enabled?: boolean
+  stt_available?: boolean
+  tts?: boolean
 }

 export interface VoiceRecordResponse {
+  status?: string
  text?: string
 }

@@ -368,6 +374,8 @@ export type GatewayEvent =
  | { payload?: { text?: string }; session_id?: string; type: 'thinking.delta' }
  | { payload?: undefined; session_id?: string; type: 'message.start' }
  | { payload?: { kind?: string; text?: string }; session_id?: string; type: 'status.update' }
+  | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' }
+  | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' }
  | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' }
  | { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' }
  | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }
--- a/ui-tui/src/lib/platform.ts
+++ b/ui-tui/src/lib/platform.ts
@@ -33,3 +33,17 @@ export const isMacActionFallback = (
 /** Match action-modifier + a single character (case-insensitive). */
 export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
  isActionMod(key) && ch.toLowerCase() === target
+
+/**
+ * Voice recording toggle key (Ctrl+B).
+ *
+ * Documented as "Ctrl+B" everywhere: tips.py, config.yaml's voice.record_key
+ * default, and the Python CLI prompt_toolkit handler. We accept raw Ctrl+B on
+ * every platform so the TUI matches those docs. On macOS we additionally
+ * accept Cmd+B (the platform action modifier) so existing macOS muscle memory
+ * keeps working.
+ */
+export const isVoiceToggleKey = (
+  key: { ctrl: boolean; meta: boolean; super?: boolean },
+  ch: string
+): boolean => (key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b'
--- a/ui-tui/src/types.ts
+++ b/ui-tui/src/types.ts
@@ -102,7 +102,7 @@ export interface ClarifyReq {

 export interface Msg {
  info?: SessionInfo
-  kind?: 'intro' | 'panel' | 'slash' | 'trail'
+  kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail'
  panelData?: PanelData
  role: Role
  text: string