chore: remove vendor-specific references from app_tools

fix: use 'is not None' checks for session/session_id, remove dead _EXECUTE_STRIP_KEYS
- 'if session:' drops empty dict {} which is schema-valid - 'if session_id:' drops empty string which shouldn't be silently eaten - _EXECUTE_STRIP_KEYS frozenset was defined but never referenced (handler uses allowlist approach instead)
2026-05-23 22:59:01 +05:30 · 2026-05-23 22:19:24 +05:30 · 2026-05-23 22:13:08 +05:30 · 2026-05-23 21:16:42 +05:30 · 2026-05-23 21:08:04 +05:30 · 2026-05-23 20:52:08 +05:30
91 changed files with 5200 additions and 577 deletions
@@ -1125,7 +1125,18 @@ def init_agent(
    # through _ra().get_tool_definitions()).  Duplicate function names cause
    # 400 errors on providers that enforce unique names (e.g. Xiaomi
    # MiMo via Nous Portal).
-    if agent._memory_manager and agent.tools is not None:
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
@@ -1435,8 +1446,22 @@ def init_agent(
    # errors. Even with the cache fix, dedup is the right defense
    # against plugin paths that may register the same schemas via
    # ctx.register_tool(). Mirrors the memory tools dedup above.
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    # context engine tools follow the same gating pattern as memory
+    # provider tools — without the gate, `platform_toolsets: telegram: []`
+    # would still leak lcm_* tools into the tool surface and incur the
+    # same local-model latency penalty.
    agent._context_engine_tool_names: set = set()
-    if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
+    if (
+        hasattr(agent, "context_compressor")
+        and agent.context_compressor
+        and agent.tools is not None
+        and (
+            agent.enabled_toolsets is None
+            or "context_engine" in agent.enabled_toolsets
+        )
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
@@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


-def convert_messages_to_anthropic(
-    messages: List[Dict],
-    base_url: str | None = None,
-    model: str | None = None,
-) -> Tuple[Optional[Any], List[Dict]]:
-    """Convert OpenAI-format messages to Anthropic format.
+def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an assistant message to Anthropic content blocks.

-    Returns (system_prompt, anthropic_messages).
-    System messages are extracted since Anthropic takes them as a separate param.
-    system_prompt is a string or list of content blocks (when cache_control present).
-
-    When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
-
-    When *model* is provided and matches the Kimi / Moonshot family (or
-    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
-    synthesised from ``reasoning_content`` are preserved on replayed
-    assistant tool-call messages — Kimi requires the field to exist, even
-    if empty.
+    Handles thinking blocks, regular content, tool calls, and
+    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
-    system = None
-    result = []
-
-    for m in messages:
-        role = m.get("role", "user")
-        content = m.get("content", "")
-
-        if role == "system":
-            if isinstance(content, list):
-                # Preserve cache_control markers on content blocks
-                has_cache = any(
-                    p.get("cache_control") for p in content if isinstance(p, dict)
-                )
-                if has_cache:
-                    system = [p for p in content if isinstance(p, dict)]
-                else:
-                    system = "\n".join(
-                        p["text"] for p in content if p.get("type") == "text"
-                    )
-            else:
-                system = content
-            continue
-
-        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
-            if content:
-                if isinstance(content, list):
-                    converted_content = _convert_content_to_anthropic(content)
-                    if isinstance(converted_content, list):
-                        blocks.extend(converted_content)
-                else:
-                    blocks.append({"type": "text", "text": str(content)})
-            for tc in m.get("tool_calls", []):
-                if not tc or not isinstance(tc, dict):
-                    continue
-                fn = tc.get("function", {})
-                args = fn.get("arguments", "{}")
-                try:
-                    parsed_args = json.loads(args) if isinstance(args, str) else args
-                except (json.JSONDecodeError, ValueError):
-                    parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
-            # Anthropic rejects empty assistant content
-            effective = blocks or content
-            if not effective or effective == "":
-                effective = [{"type": "text", "text": "(empty)"}]
-            result.append({"role": "assistant", "content": effective})
-            continue
-
-        if role == "tool":
-            # Sanitize tool_use_id and ensure non-empty content.
-            # Computer-use (and other multimodal) tool results arrive as
-            # either a list of OpenAI-style content parts, or a dict
-            # marked `_multimodal` with an embedded `content` list. Convert
-            # both into Anthropic `tool_result` inner blocks (text + image).
-            multimodal_blocks: Optional[List[Dict[str, Any]]] = None
-            if isinstance(content, dict) and content.get("_multimodal"):
-                multimodal_blocks = _content_parts_to_anthropic_blocks(
-                    content.get("content") or []
-                )
-                # Fallback text if the conversion produced nothing usable.
-                if not multimodal_blocks and content.get("text_summary"):
-                    multimodal_blocks = [
-                        {"type": "text", "text": str(content["text_summary"])}
-                    ]
-            elif isinstance(content, list):
-                converted = _content_parts_to_anthropic_blocks(content)
-                if any(b.get("type") == "image" for b in converted):
-                    multimodal_blocks = converted
-            # Back-compat: some callers stash blocks under a private key.
-            if multimodal_blocks is None:
-                stashed = m.get("_anthropic_content_blocks")
-                if isinstance(stashed, list) and stashed:
-                    text_content = content if isinstance(content, str) and content.strip() else None
-                    multimodal_blocks = (
-                        [{"type": "text", "text": text_content}] + stashed
-                        if text_content else list(stashed)
-                    )
-
-            if multimodal_blocks:
-                result_content: Any = multimodal_blocks
-            elif isinstance(content, str):
-                result_content = content
-            else:
-                result_content = json.dumps(content) if content else "(no output)"
-            if not result_content:
-                result_content = "(no output)"
-            tool_result = {
-                "type": "tool_result",
-                "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
-                "content": result_content,
-            }
-            if isinstance(m.get("cache_control"), dict):
-                tool_result["cache_control"] = dict(m["cache_control"])
-            # Merge consecutive tool results into one user message
-            if (
-                result
-                and result[-1]["role"] == "user"
-                and isinstance(result[-1]["content"], list)
-                and result[-1]["content"]
-                and result[-1]["content"][0].get("type") == "tool_result"
-            ):
-                result[-1]["content"].append(tool_result)
-            else:
-                result.append({"role": "user", "content": [tool_result]})
-            continue
-
-        # Regular user message — validate non-empty content (Anthropic rejects empty)
+    content = m.get("content", "")
+    blocks = _extract_preserved_thinking_blocks(m)
+    if content:
        if isinstance(content, list):
-            converted_blocks = _convert_content_to_anthropic(content)
-            # Check if all text blocks are empty
-            if not converted_blocks or all(
-                b.get("text", "").strip() == ""
-                for b in converted_blocks
-                if isinstance(b, dict) and b.get("type") == "text"
-            ):
-                converted_blocks = [{"type": "text", "text": "(empty message)"}]
-            result.append({"role": "user", "content": converted_blocks})
+            converted_content = _convert_content_to_anthropic(content)
+            if isinstance(converted_content, list):
+                blocks.extend(converted_content)
        else:
-            # Validate string content is non-empty
-            if not content or (isinstance(content, str) and not content.strip()):
-                content = "(empty message)"
-            result.append({"role": "user", "content": content})
+            blocks.append({"type": "text", "text": str(content)})
+    for tc in m.get("tool_calls", []):
+        if not tc or not isinstance(tc, dict):
+            continue
+        fn = tc.get("function", {})
+        args = fn.get("arguments", "{}")
+        try:
+            parsed_args = json.loads(args) if isinstance(args, str) else args
+        except (json.JSONDecodeError, ValueError):
+            parsed_args = {}
+        blocks.append({
+            "type": "tool_use",
+            "id": _sanitize_tool_id(tc.get("id", "")),
+            "name": fn.get("name", ""),
+            "input": parsed_args,
+        })
+    # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+    # tool-call messages to carry reasoning_content when thinking is
+    # enabled server-side.  Preserve it as a thinking block so Kimi
+    # can validate the message history.  See hermes-agent#13848.
+    #
+    # Accept empty string "" — _copy_reasoning_content_for_api()
+    # injects "" as a tier-3 fallback for Kimi tool-call messages
+    # that had no reasoning.  Kimi requires the field to exist, even
+    # if empty.
+    #
+    # Prepend (not append): Anthropic protocol requires thinking
+    # blocks before text and tool_use blocks.
+    #
+    # Guard: only add when reasoning_details didn't already contribute
+    # thinking blocks.  On native Anthropic, reasoning_details produces
+    # signed thinking blocks — adding another unsigned one from
+    # reasoning_content would create a duplicate (same text) that gets
+    # downgraded to a spurious text block on the last assistant message.
+    reasoning_content = m.get("reasoning_content")
+    _already_has_thinking = any(
+        isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
+        for b in blocks
+    )
+    if isinstance(reasoning_content, str) and not _already_has_thinking:
+        blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
+    # Anthropic rejects empty assistant content
+    effective = blocks or content
+    if not effective or effective == "":
+        effective = [{"type": "text", "text": "(empty)"}]
+    return {"role": "assistant", "content": effective}

+
+def _convert_tool_message_to_result(
+    result: List[Dict[str, Any]], m: Dict[str, Any]
+) -> None:
+    """Convert a tool message to an Anthropic tool_result, merging consecutive
+    results into one user message.
+
+    Mutates ``result`` in place — either appends a new user message or extends
+    the trailing user message's tool_result list.
+    """
+    content = m.get("content", "")
+    multimodal_blocks: Optional[List[Dict[str, Any]]] = None
+    if isinstance(content, dict) and content.get("_multimodal"):
+        multimodal_blocks = _content_parts_to_anthropic_blocks(
+            content.get("content") or []
+        )
+        # Fallback text if the conversion produced nothing usable.
+        if not multimodal_blocks and content.get("text_summary"):
+            multimodal_blocks = [
+                {"type": "text", "text": str(content["text_summary"])}
+            ]
+    elif isinstance(content, list):
+        converted = _content_parts_to_anthropic_blocks(content)
+        if any(b.get("type") == "image" for b in converted):
+            multimodal_blocks = converted
+    # Back-compat: some callers stash blocks under a private key.
+    if multimodal_blocks is None:
+        stashed = m.get("_anthropic_content_blocks")
+        if isinstance(stashed, list) and stashed:
+            text_content = content if isinstance(content, str) and content.strip() else None
+            multimodal_blocks = (
+                [{"type": "text", "text": text_content}] + stashed
+                if text_content else list(stashed)
+            )
+
+    if multimodal_blocks:
+        result_content: Any = multimodal_blocks
+    elif isinstance(content, str):
+        result_content = content
+    else:
+        result_content = json.dumps(content) if content else "(no output)"
+    if not result_content:
+        result_content = "(no output)"
+    tool_result = {
+        "type": "tool_result",
+        "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
+        "content": result_content,
+    }
+    if isinstance(m.get("cache_control"), dict):
+        tool_result["cache_control"] = dict(m["cache_control"])
+    # Merge consecutive tool results into one user message
+    if (
+        result
+        and result[-1]["role"] == "user"
+        and isinstance(result[-1]["content"], list)
+        and result[-1]["content"]
+        and result[-1]["content"][0].get("type") == "tool_result"
+    ):
+        result[-1]["content"].append(tool_result)
+    else:
+        result.append({"role": "user", "content": [tool_result]})
+
+
+def _convert_user_message(content: Any) -> Dict[str, Any]:
+    """Validate and convert a user message to anthropic format."""
+    if isinstance(content, list):
+        converted_blocks = _convert_content_to_anthropic(content)
+        if not converted_blocks or all(
+            b.get("text", "").strip() == ""
+            for b in converted_blocks
+            if isinstance(b, dict) and b.get("type") == "text"
+        ):
+            converted_blocks = [{"type": "text", "text": "(empty message)"}]
+        return {"role": "user", "content": converted_blocks}
+    else:
+        if not content or (isinstance(content, str) and not content.strip()):
+            content = "(empty message)"
+        return {"role": "user", "content": content}
+
+
+def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
+    """Strip tool_use blocks with no matching tool_result, and vice versa.
+
+    Context compression or session truncation can remove either side of a
+    tool-call pair.  Anthropic rejects both orphans with HTTP 400.
+
+    Mutates ``result`` in place.
+    """
    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
    for m in result:
@@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool call removed)"}]

-    # Strip orphaned tool_result blocks (no matching tool_use precedes them).
-    # This is the mirror of the above: context compression or session truncation
-    # can remove an assistant message containing a tool_use while leaving the
-    # subsequent tool_result intact.  Anthropic rejects these with a 400.
+    # Strip orphaned tool_result blocks (no matching tool_use precedes them)
    tool_use_ids = set()
    for m in result:
        if m["role"] == "assistant" and isinstance(m["content"], list):
@@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool result removed)"}]

-    # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
+
+def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge consecutive same-role messages to enforce Anthropic alternation.
+
+    Returns a new list (caller must rebind ``result``).
+    """
    fixed = []
    for m in result:
        if fixed and fixed[-1]["role"] == m["role"]:
            if m["role"] == "user":
-                # Merge consecutive user messages
                prev_content = fixed[-1]["content"]
                curr_content = m["content"]
                if isinstance(prev_content, str) and isinstance(curr_content, str):
@@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_content, list) and isinstance(curr_content, list):
                    fixed[-1]["content"] = prev_content + curr_content
                else:
-                    # Mixed types — wrap string in list
                    if isinstance(prev_content, str):
                        prev_content = [{"type": "text", "text": prev_content}]
                    if isinstance(curr_content, str):
@@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
                    fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
                else:
-                    # Mixed types — normalize both to list and merge
                    if isinstance(prev_blocks, str):
                        prev_blocks = [{"type": "text", "text": prev_blocks}]
                    if isinstance(curr_blocks, str):
@@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
                    fixed[-1]["content"] = prev_blocks + curr_blocks
        else:
            fixed.append(m)
-    result = fixed
+    return fixed

-    # ── Thinking block signature management ──────────────────────────
-    # Anthropic signs thinking blocks against the full turn content.
-    # Any upstream mutation (context compression, session truncation,
-    # orphan stripping, message merging) invalidates the signature,
-    # causing HTTP 400 "Invalid signature in thinking block".
-    #
-    # Signatures are Anthropic-proprietary.  Third-party endpoints
-    # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
-    # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
-    #
-    # For direct Anthropic (strategy following clawdbot/OpenClaw):
-    # 1. Strip thinking/redacted_thinking from all assistant messages
-    #    EXCEPT the last one — preserves reasoning continuity on the
-    #    current tool-use chain while avoiding stale signature errors.
-    # 2. Downgrade unsigned thinking blocks (no signature) to text —
-    #    Anthropic can't validate them and will reject them.
-    # 3. Strip cache_control from thinking/redacted_thinking blocks —
-    #    cache markers can interfere with signature validation.
+
+def _manage_thinking_signatures(
+    result: List[Dict[str, Any]], base_url: str | None, model: str | None
+) -> None:
+    """Strip or preserve thinking blocks based on endpoint type.
+
+    Anthropic signs thinking blocks against the full turn content.
+    Any upstream mutation (context compression, session truncation, orphan
+    stripping, message merging) invalidates the signature, causing HTTP 400
+    "Invalid signature in thinking block".
+
+    Signatures are Anthropic-proprietary.  Third-party endpoints (MiniMax,
+    Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
+    and will reject them outright.  Kimi's /coding and DeepSeek's /anthropic
+    endpoints speak the Anthropic protocol upstream but require unsigned
+    thinking blocks (synthesised from ``reasoning_content``) to round-trip on
+    replayed assistant tool-call messages.  See hermes-agent#13848 (Kimi) and
+    hermes-agent#16748 (DeepSeek).
+
+    Mutates ``result`` in place.
+    """
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    # Kimi / DeepSeek share a contract: strip signed Anthropic blocks
+    # (neither upstream can validate Anthropic signatures), preserve unsigned
+    # ones synthesised from reasoning_content.  See #13848, #16748.
    _preserve_unsigned_thinking = (
        _is_kimi_family_endpoint(base_url, model)
        or _is_deepseek_anthropic_endpoint(base_url)
@@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
            continue

        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+            # Kimi / DeepSeek: strip signed, preserve unsigned.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Signed (or redacted-with-data) — upstream can't validate, strip.
                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
+            # Third-party: strip ALL thinking blocks (signatures are proprietary).
            # Direct Anthropic: strip from non-latest assistant messages only.
            stripped = [
                b for b in m["content"]
@@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
            ]
            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
        else:
-            # Latest assistant on direct Anthropic: keep signed thinking
-            # blocks for reasoning continuity; downgrade unsigned ones to
-            # plain text.
+            # Latest assistant on direct Anthropic: keep signed, downgrade unsigned
+            # to text so the reasoning isn't lost.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("type") == "redacted_thinking":
-                    # Redacted blocks use 'data' for the signature payload
+                    # Redacted blocks use 'data' for the signature payload —
+                    # drop the block when 'data' is missing (can't be validated).
                    if b.get("data"):
                        new_content.append(b)
-                    # else: drop — no data means it can't be validated
                elif b.get("signature"):
-                    # Signed thinking block — keep it
                    new_content.append(b)
                else:
-                    # Unsigned thinking — downgrade to text so it's not lost
                    thinking_text = b.get("thinking", "")
                    if thinking_text:
                        new_content.append({"type": "text", "text": thinking_text})
@@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
                b.pop("cache_control", None)

-    # ── Image eviction: keep only the most recent N screenshots ─────
-    # computer_use screenshots (base64 images) sit inside tool_result
-    # blocks: they accumulate and are sent with every API call. Each
-    # costs ~1,465 tokens; after 10+ the conversation becomes slow
-    # even for simple text queries. Walk backward, keep the most recent
-    # _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
+
+def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
+    """Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
+
+    Base64 images cost ~1,465 tokens each and accumulate across tool calls.
+    Walk backward, keep the most recent N, replace older ones with a placeholder.
+
+    Mutates ``result`` in place.
+    """
    _MAX_KEEP_IMAGES = 3
    _image_count = 0
    for msg in reversed(result):
@@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
                    for b in inner
                ]

+
+def convert_messages_to_anthropic(
+    messages: List[Dict],
+    base_url: str | None = None,
+    model: str | None = None,
+) -> Tuple[Optional[Any], List[Dict]]:
+    """Convert OpenAI-format messages to Anthropic format.
+
+    Returns (system_prompt, anthropic_messages).
+    System messages are extracted since Anthropic takes them as a separate param.
+    system_prompt is a string or list of content blocks (when cache_control present).
+
+    When *base_url* is provided and points to a third-party Anthropic-compatible
+    endpoint, all thinking block signatures are stripped.  Signatures are
+    Anthropic-proprietary — third-party endpoints cannot validate them and will
+    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
+    """
+    system = None
+    result: List[Dict[str, Any]] = []
+
+    for m in messages:
+        role = m.get("role", "user")
+        content = m.get("content", "")
+
+        if role == "system":
+            if isinstance(content, list):
+                # Preserve cache_control markers on content blocks
+                has_cache = any(
+                    p.get("cache_control") for p in content if isinstance(p, dict)
+                )
+                if has_cache:
+                    system = [p for p in content if isinstance(p, dict)]
+                else:
+                    system = "\n".join(
+                        p["text"] for p in content if p.get("type") == "text"
+                    )
+            else:
+                system = content
+            continue
+
+        if role == "assistant":
+            result.append(_convert_assistant_message(m))
+            continue
+
+        if role == "tool":
+            _convert_tool_message_to_result(result, m)
+            continue
+
+        # Regular user message
+        result.append(_convert_user_message(content))
+
+    _strip_orphaned_tool_blocks(result)
+    result = _merge_consecutive_roles(result)
+    _manage_thinking_signatures(result, base_url, model)
+    _evict_old_screenshots(result)
+
    return system, result


@@ -46,6 +46,7 @@ from agent.message_sanitization import (
    _strip_non_ascii,
 )
 from agent.model_metadata import (
+    MINIMUM_CONTEXT_LENGTH,
    estimate_messages_tokens_rough,
    estimate_request_tokens_rough,
    get_next_probe_tier,
@@ -73,6 +74,50 @@ from utils import base_url_host_matches, env_var_enabled
 logger = logging.getLogger(__name__)


+def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
+    """Return a user-facing error when Ollama is loaded with too little context."""
+    if not getattr(agent, "tools", None):
+        return None
+
+    runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
+    if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
+        return None
+    if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
+        return None
+
+    model = getattr(agent, "model", "") or "the selected model"
+    base_url = getattr(agent, "base_url", "") or "unknown base URL"
+    provider = getattr(agent, "provider", "") or "unknown"
+    tool_count = len(getattr(agent, "tools", None) or [])
+
+    logger.warning(
+        "Ollama runtime context too small for Hermes tool use: "
+        "model=%s provider=%s base_url=%s runtime_context=%d "
+        "minimum_context=%d estimated_request_tokens=%d tool_count=%d "
+        "session=%s",
+        model,
+        provider,
+        base_url,
+        runtime_ctx,
+        MINIMUM_CONTEXT_LENGTH,
+        request_tokens,
+        tool_count,
+        getattr(agent, "session_id", None) or "none",
+    )
+
+    return (
+        f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
+        f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
+        "for reliable tool use.\n\n"
+        "Increase the Ollama context for this model and restart/reload the "
+        "model before trying again. A known-good starting point is 65,536 "
+        "tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
+        "(and `model.context_length: 65536` if you also override the displayed "
+        "model context). If you manage the model through an Ollama Modelfile, "
+        "set `PARAMETER num_ctx 65536` there instead."
+    )
+
+
 def _ra():
    """Lazy reference to ``run_agent`` so callers can patch
    ``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
@@ -527,6 +572,7 @@ def run_conversation(
    api_call_count = 0
    final_response = None
    interrupted = False
+    failed = False
    codex_ack_continuations = 0
    length_continue_retries = 0
    truncated_tool_call_retries = 0
@@ -883,6 +929,26 @@ def run_conversation(
        # Calculate approximate request size for logging
        total_chars = sum(len(str(msg)) for msg in api_messages)
        approx_tokens = estimate_messages_tokens_rough(api_messages)
+        approx_request_tokens = estimate_request_tokens_rough(
+            api_messages, tools=agent.tools or None
+        )
+
+        _runtime_context_error = _ollama_context_limit_error(
+            agent, approx_request_tokens
+        )
+        if _runtime_context_error:
+            final_response = _runtime_context_error
+            failed = True
+            _turn_exit_reason = "ollama_runtime_context_too_small"
+            messages.append({"role": "assistant", "content": final_response})
+            agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
+            api_call_count -= 1
+            agent._api_call_count = api_call_count
+            try:
+                agent.iteration_budget.refund()
+            except Exception:
+                pass
+            break
        
        # Thinking spinner for quiet mode (animated during API call)
        thinking_spinner = None
@@ -923,6 +989,7 @@ def run_conversation(
        copilot_auth_retry_attempted=False
        thinking_sig_retry_attempted = False
        image_shrink_retry_attempted = False
+        multimodal_tool_content_retry_attempted = False
        oauth_1m_beta_retry_attempted = False
        llama_cpp_grammar_retry_attempted = False
        has_retried_429 = False
@@ -1994,6 +2061,31 @@ def run_conversation(
                            "or shrink didn't reduce size; surfacing original error."
                        )

+                # Multimodal-tool-content recovery: providers that follow
+                # the OpenAI spec strictly (tool message content must be a
+                # string) reject our list-type content with a 400.  Strip
+                # image parts from any list-type tool messages, mark the
+                # (provider, model) as no-list-tool-content for the rest
+                # of this session so future tool results preemptively
+                # downgrade, and retry once.  See issue #27344.
+                if (
+                    classified.reason == FailoverReason.multimodal_tool_content_unsupported
+                    and not multimodal_tool_content_retry_attempted
+                ):
+                    multimodal_tool_content_retry_attempted = True
+                    if agent._try_strip_image_parts_from_tool_messages(api_messages):
+                        agent._vprint(
+                            f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
+                            f"downgraded screenshots to text and retrying...",
+                            force=True,
+                        )
+                        continue
+                    else:
+                        logger.info(
+                            "multimodal-tool-content recovery: no list-type tool "
+                            "messages with image parts found; surfacing original error."
+                        )
+
                # Anthropic OAuth subscription rejected the 1M-context beta
                # header ("long context beta is not yet available for this
                # subscription"). Disable the beta for the rest of this
@@ -3848,7 +3940,11 @@ def run_conversation(
                )

    # Determine if conversation completed successfully
-    completed = final_response is not None and api_call_count < agent.max_iterations
+    completed = (
+        final_response is not None
+        and api_call_count < agent.max_iterations
+        and not failed
+    )

    # Save trajectory if enabled.  ``user_message`` may be a multimodal
    # list of parts; the trajectory format wants a plain string.
@@ -3998,6 +4094,7 @@ def run_conversation(
        "api_calls": api_call_count,
        "completed": completed,
        "turn_exit_reason": _turn_exit_reason,
+        "failed": failed,
        "partial": False,  # True only when stopped due to invalid tool calls
        "interrupted": interrupted,
        "response_previewed": getattr(agent, "_response_was_previewed", False),
@@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
+    multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported"  # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry

    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
@@ -165,6 +166,32 @@ _IMAGE_TOO_LARGE_PATTERNS = [
    # the likely culprit; we still try the shrink path before giving up.
 ]

+# Providers that follow the OpenAI spec strictly require tool message
+# ``content`` to be a string.  Some (Anthropic native, Codex Responses,
+# Gemini native, first-party OpenAI) extend this to accept a content-parts
+# list (text + image_url) so screenshots from computer_use survive.  Others
+# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
+# providers) reject the list with a 400 — the patterns below are the most
+# common error shapes we see.  Recovery: strip image parts from tool
+# messages in-place, record the (provider, model) for the rest of the
+# session so we don't waste another call learning the same lesson, retry.
+#
+# See: https://github.com/NousResearch/hermes-agent/issues/27344
+_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
+    # Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
+    "text is not set",
+    # Generic "tool message must be string" shapes
+    "tool message content must be a string",
+    "tool content must be a string",
+    "tool message must be a string",
+    # OpenAI-compat servers that reject list-type tool content with a
+    # schema-validation message
+    "expected string, got list",
+    "expected string, got array",
+    # Alibaba/DashScope variant
+    "tool_call.content must be string",
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -781,6 +808,19 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Multimodal tool content rejected from 400.  Must be checked BEFORE
+    # image_too_large because the recovery is different (strip image parts
+    # from tool messages, mark the model as no-list-tool-content for the
+    # rest of the session) and BEFORE context_overflow because some of the
+    # patterns ("text is not set") are ambiguous in isolation but become
+    # specific when combined with a 400 on a request known to contain
+    # multimodal tool content.
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
    # Must be checked BEFORE context_overflow because messages can trip both
    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
@@ -922,6 +962,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Multimodal tool content patterns (from message text when no status_code)
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large patterns (from message text when no status_code)
    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
        return result_fn(
@@ -1258,6 +1258,10 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
        "terminal",
        "process",
        "execute_code",
+        "app_search_tools",
+        "app_tool_schemas",
+        "app_execute_tools",
+        "app_manage_connections",
    }

    if valid_names and not (valid_names & relevant_tool_names):
@@ -1279,7 +1283,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, browser automation (Browser Use), and app integrations (500+ apps) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
@@ -1294,6 +1298,39 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
    return "\n".join(lines)


+# =========================================================================
+# App tools (500+ external integrations) behavioural prompt
+# =========================================================================
+
+_APP_TOOLS_PROMPT = """\
+## App Tools (500+ External Integrations)
+
+You have app_search_tools, app_tool_schemas, app_execute_tools, and app_manage_connections available RIGHT NOW as callable tools. They are already configured and connected to the Nous tool gateway — no SDK installation, no API keys, no plugin setup needed. Just call them.
+
+**When to use:** When a user asks to interact with ANY external app or service — Gmail, Slack, GitHub, Jira, Notion, Google Sheets, Linear, HubSpot, Figma, Salesforce, or any of 500+ other apps. ALWAYS prefer these tools over loading skills about the same service (e.g. do NOT load the 'linear', 'airtable', 'google-workspace', 'notion', or any similar skill — use app_search_tools instead). Do NOT suggest installing SDKs, CLI tools, MCP servers, or API keys for external services — call app_search_tools directly.
+
+**Workflow:**
+1. Call app_search_tools with a clear use_case description to discover available tools
+2. Check the response for connection status — if no active connection, call app_manage_connections and share the auth link with the user
+3. Review the execution plan and pitfalls in the search response before executing
+4. If a tool has schemaRef instead of input_schema, call app_tool_schemas to get the full schema
+5. Execute tools via app_execute_tools with schema-compliant arguments
+
+**Session tracking:** Pass session: {generate_id: true} on your first app_search_tools call. Reuse the returned session.id in all subsequent calls. Generate a new session when the user pivots to a different task.
+
+**Important:** Never fabricate tool slugs or argument field names. Only use slugs and schemas returned by app_search_tools or app_tool_schemas."""
+
+
+def build_app_tools_prompt(valid_tool_names: "set[str] | None" = None) -> str:
+    """Return the app tools behavioural guidance when the toolset is active."""
+    if valid_tool_names and "app_search_tools" not in valid_tool_names:
+        return ""
+    if not valid_tool_names:
+        # No tool names known — skip (conservative)
+        return ""
+    return _APP_TOOLS_PROMPT
+
+
 # =========================================================================
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
@@ -12,7 +12,7 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple

-from hermes_constants import get_config_path, get_skills_dir
+from hermes_constants import get_config_path, get_skills_dir, is_termux

 logger = logging.getLogger(__name__)

@@ -136,6 +136,14 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:

    If the field is absent or empty the skill is compatible with **all**
    platforms (backward-compatible default).
+
+    Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
+    older Pythons but became ``"android"`` on Python 3.13+. Termux is a
+    Linux userland riding on the Android kernel, so skills tagged
+    ``linux`` are treated as compatible in Termux regardless of which
+    ``sys.platform`` value Python reports. Individual Linux commands
+    inside a skill may still misbehave (no systemd, BusyBox utils, no
+    apt/dnf, etc.) but that is on the skill, not on platform gating.
    """
    platforms = frontmatter.get("platforms")
    if not platforms:
@@ -143,11 +151,21 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
    if not isinstance(platforms, list):
        platforms = [platforms]
    current = sys.platform
+    running_in_termux = is_termux()
    for platform in platforms:
        normalized = str(platform).lower().strip()
        mapped = PLATFORM_MAP.get(normalized, normalized)
        if current.startswith(mapped):
            return True
+        # Termux runs a Linux userland on Android. Accept linux-tagged
+        # skills regardless of whether sys.platform is "linux" (pre-3.13
+        # Termux) or "android" (Python 3.13+ Termux, and any other
+        # Android runtime).
+        if running_in_termux and mapped == "linux":
+            return True
+        # Explicit termux/android tags match a Termux session too.
+        if running_in_termux and mapped in ("termux", "android"):
+            return True
    return False


@@ -130,6 +130,12 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
    if nous_subscription_prompt:
        stable_parts.append(nous_subscription_prompt)
+
+    # App tools (500+ external integrations) behavioural guidance
+    app_tools_prompt = _r.build_app_tools_prompt(agent.valid_tool_names)
+    if app_tools_prompt:
+        stable_parts.append(app_tools_prompt)
+
    # Tool-use enforcement: tells the model to actually call tools instead
    # of describing intended actions.  Controlled by config.yaml
    # agent.tool_use_enforcement:
@@ -18,6 +18,7 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
 Storage: ~/.hermes/pairing/
 """

+import hashlib
 import json
 import os
 import secrets
@@ -148,6 +149,11 @@ class PairingStore:

    # ----- Pending codes -----

+    @staticmethod
+    def _hash_code(code: str, salt: bytes) -> str:
+        """Hash a pairing code with the given salt using SHA-256."""
+        return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
+
    def generate_code(
        self, platform: str, user_id: str, user_name: str = ""
    ) -> Optional[str]:
@@ -158,6 +164,9 @@ class PairingStore:
          - User is rate-limited (too recent request)
          - Max pending codes reached for this platform
          - User/platform is in lockout due to failed attempts
+
+        The code is NOT stored in plaintext.  Only a salted SHA-256 hash is
+        persisted so that reading the pending file does not reveal codes.
        """
        with self._lock:
            self._cleanup_expired(platform)
@@ -178,8 +187,17 @@ class PairingStore:
            # Generate cryptographically random code
            code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))

-            # Store pending request
-            pending[code] = {
+            # Hash the code with a random salt before storing
+            salt = os.urandom(16)
+            code_hash = self._hash_code(code, salt)
+
+            # Use a unique entry id as the key (not the code itself)
+            entry_id = secrets.token_hex(8)
+
+            # Store pending request with hashed code
+            pending[entry_id] = {
+                "hash": code_hash,
+                "salt": salt.hex(),
                "user_id": user_id,
                "user_name": user_name,
                "created_at": time.time(),
@@ -195,10 +213,16 @@ class PairingStore:
        """
        Approve a pairing code. Adds the user to the approved list.

-        Returns {user_id, user_name} on success, None if code is
+        Returns ``{user_id, user_name}`` on success, ``None`` if the code is
        invalid/expired OR the platform is currently locked out after
        ``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
        disambiguate with ``_is_locked_out(platform)``.
+
+        Verification: the user-provided code is hashed with each stored
+        entry's salt and compared to the stored hash using constant-time
+        comparison. Pre-hash entries (legacy plaintext-key format from
+        pre-upgrade pending.json files) are silently ignored — they get
+        pruned at TTL by ``_cleanup_expired``.
        """
        with self._lock:
            self._cleanup_expired(platform)
@@ -213,34 +237,73 @@ class PairingStore:
                return None

            pending = self._load_json(self._pending_path(platform))
-            if code not in pending:
+
+            # Find the entry whose hash matches the provided code.
+            # Tolerate legacy plaintext-key entries (no salt/hash) and
+            # malformed entries — skip them rather than KeyError, so an
+            # in-place upgrade across an existing pending.json doesn't
+            # crash on the first approve call. Legacy entries get pruned
+            # at their TTL by _cleanup_expired.
+            matched_key = None
+            matched_entry = None
+            for entry_id, entry in pending.items():
+                if not isinstance(entry, dict):
+                    continue
+                if "salt" not in entry or "hash" not in entry:
+                    continue
+                try:
+                    salt = bytes.fromhex(entry["salt"])
+                except ValueError:
+                    continue
+                candidate_hash = self._hash_code(code, salt)
+                if secrets.compare_digest(candidate_hash, entry["hash"]):
+                    matched_key = entry_id
+                    matched_entry = entry
+                    break
+
+            if matched_key is None:
                self._record_failed_attempt(platform)
                return None

-            entry = pending.pop(code)
+            del pending[matched_key]
            self._save_json(self._pending_path(platform), pending)

            # Add to approved list
-            self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
+            self._approve_user(platform, matched_entry["user_id"],
+                               matched_entry.get("user_name", ""))

            return {
-                "user_id": entry["user_id"],
-                "user_name": entry.get("user_name", ""),
+                "user_id": matched_entry["user_id"],
+                "user_name": matched_entry.get("user_name", ""),
            }

    def list_pending(self, platform: str = None) -> list:
-        """List pending pairing requests, optionally filtered by platform."""
+        """List pending pairing requests, optionally filtered by platform.
+
+        Codes are stored hashed — the ``code`` field is replaced with the
+        first 8 hex characters of the hash so admins can distinguish entries
+        without revealing the original code. Legacy plaintext-key entries
+        (pre-hash format) are shown with a "legacy" placeholder so admins
+        can see them age out without crashing on a missing ``hash`` field.
+        """
        results = []
        platforms = [platform] if platform else self._all_platforms("pending")
        for p in platforms:
            self._cleanup_expired(p)
            pending = self._load_json(self._pending_path(p))
-            for code, info in pending.items():
-                age_min = int((time.time() - info["created_at"]) / 60)
+            for entry_id, info in pending.items():
+                if not isinstance(info, dict):
+                    continue
+                created_at = info.get("created_at")
+                if not isinstance(created_at, (int, float)):
+                    continue
+                age_min = int((time.time() - created_at) / 60)
+                hash_val = info.get("hash")
+                code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
                results.append({
                    "platform": p,
-                    "code": code,
-                    "user_id": info["user_id"],
+                    "code": code_display,
+                    "user_id": info.get("user_id", ""),
                    "user_name": info.get("user_name", ""),
                    "age_minutes": age_min,
                })
@@ -297,17 +360,29 @@ class PairingStore:
    # ----- Cleanup -----

    def _cleanup_expired(self, platform: str) -> None:
-        """Remove expired pending codes."""
+        """Remove expired pending codes.
+
+        Tolerant of malformed / legacy entries — anything without a numeric
+        ``created_at`` is treated as expired (it's effectively unusable
+        with the new hash-keyed schema anyway).
+        """
        path = self._pending_path(platform)
        pending = self._load_json(path)
        now = time.time()
-        expired = [
-            code for code, info in pending.items()
-            if (now - info["created_at"]) > CODE_TTL_SECONDS
-        ]
+        expired = []
+        for entry_id, info in pending.items():
+            if not isinstance(info, dict):
+                expired.append(entry_id)
+                continue
+            created_at = info.get("created_at")
+            if not isinstance(created_at, (int, float)):
+                expired.append(entry_id)
+                continue
+            if (now - created_at) > CODE_TTL_SECONDS:
+                expired.append(entry_id)
        if expired:
-            for code in expired:
-                del pending[code]
+            for entry_id in expired:
+                del pending[entry_id]
            self._save_json(path, pending)

    def _all_platforms(self, suffix: str) -> list:
@@ -308,11 +308,26 @@ class WebhookAdapter(BasePlatformAdapter):
            data = json.loads(subs_path.read_text(encoding="utf-8"))
            if not isinstance(data, dict):
                return
-            # Merge: static routes take precedence over dynamic ones
-            self._dynamic_routes = {
-                k: v for k, v in data.items()
-                if k not in self._static_routes
-            }
+            # Merge: static routes take precedence over dynamic ones.
+            # Reject any dynamic route whose effective secret is empty —
+            # an empty secret would cause _handle_webhook to skip HMAC
+            # validation entirely, letting unauthenticated callers in.
+            new_dynamic: Dict[str, dict] = {}
+            for k, v in data.items():
+                if k in self._static_routes:
+                    continue
+                effective_secret = v.get("secret", self._global_secret)
+                if not effective_secret:
+                    logger.warning(
+                        "[webhook] Dynamic route '%s' skipped: 'secret' is "
+                        "missing or empty. Set a valid HMAC secret, or use "
+                        "'%s' to explicitly disable auth (testing only).",
+                        k,
+                        _INSECURE_NO_AUTH,
+                    )
+                    continue
+                new_dynamic[k] = v
+            self._dynamic_routes = new_dynamic
            self._routes = {**self._dynamic_routes, **self._static_routes}
            self._dynamic_routes_mtime = mtime
            logger.info(
@@ -1778,8 +1778,17 @@ DEFAULT_CONFIG = {
        },
    },

+    # ── Nous Portal feature flags ──────────────────────────────────────
+    "portal": {
+        # App tools: 500+ external app integrations (Gmail, Slack, GitHub,
+        # Notion, etc.) via the Nous tool gateway.  Requires an active Nous
+        # subscription.  Set to False to hide the app_tools toolset even
+        # when a subscription is present.
+        "app_tools": True,
+    },
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 23,
+    "_config_version": 24,
 }

 # =============================================================================
@@ -2267,6 +2276,22 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
        "advanced": True,
    },
+    "TOOLS_GATEWAY_URL": {
+        "description": "Explicit URL for the tools-gateway (app integrations). Overrides the auto-derived tools-gateway.nousresearch.com",
+        "prompt": "Tools-gateway URL",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+    "PORTAL_APP_TOOLS": {
+        "description": "Enable app integration tools (500+ apps via Nous tool gateway). Requires Nous subscription.",
+        "prompt": "Enable app tools (500+ apps)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
    "TAVILY_API_KEY": {
        "description": "Tavily API key for AI-native web search, extract, and crawl",
        "prompt": "Tavily API key",
@@ -3301,7 +3326,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
-    "sessions",
+    "sessions", "portal",
 }

 # Valid fields inside a custom_providers list entry
@@ -3964,6 +3989,26 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                        f"{', '.join(added_aux)}"
                    )

+    # ── Version 23 → 24: inject app_tools into saved platform_toolsets ──
+    # The portal.app_tools config flag is handled by deep-merge (DEFAULT_CONFIG
+    # has it, so load_config() always includes it). But platform_toolsets are
+    # user-owned lists that deep-merge can't append to — existing users who
+    # ran `hermes tools` have a saved list that won't include app_tools.
+    if current_ver < 24:
+        config = read_raw_config()
+        pt = config.get("platform_toolsets")
+        if isinstance(pt, dict):
+            patched = False
+            for plat_key, ts_list in pt.items():
+                if isinstance(ts_list, list) and "app_tools" not in ts_list:
+                    ts_list.append("app_tools")
+                    patched = True
+            if patched:
+                save_config(config)
+                results["config_added"].append("app_tools added to platform_toolsets")
+                if not quiet:
+                    print("  ✓ Added app_tools to saved platform toolset lists")
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -71,7 +71,7 @@ def curses_checklist(
                curses.use_default_colors()
                curses.init_pair(1, curses.COLOR_GREEN, -1)
                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
+                curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
            cursor = 0
            scroll_offset = 0

@@ -21,6 +21,44 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
 # tests) don't spam the same warning multiple times.
 _WARNED_KEYS: set[str] = set()

+# Map of env-var name → source label ("bitwarden", etc.) for credentials
+# that were injected by an external secret source during load_hermes_dotenv().
+# Used by setup / `hermes model` flows to label detected credentials so
+# users understand WHERE a key came from when their .env doesn't contain it
+# directly (otherwise the "credentials detected ✓" line looks identical to
+# the .env case and they don't know Bitwarden is wired up).
+_SECRET_SOURCES: dict[str, str] = {}
+
+
+def get_secret_source(env_var: str) -> str | None:
+    """Return the label of the secret source that supplied ``env_var``, if any.
+
+    Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
+    during the current process's ``load_hermes_dotenv()`` call.  Returns
+    ``None`` for keys that came from ``.env``, the shell environment, or
+    aren't tracked.
+    """
+    return _SECRET_SOURCES.get(env_var)
+
+
+def format_secret_source_suffix(env_var: str) -> str:
+    """Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
+
+    Use this when printing a detected credential so the user can see where
+    it came from.  Empty string when the credential came from ``.env`` or
+    the shell — those are the implicit / "default" cases users already
+    understand.
+    """
+    source = get_secret_source(env_var)
+    if not source:
+        return ""
+    if source == "bitwarden":
+        return " (from Bitwarden)"
+    # Generic fallback — future-proofing for additional secret sources
+    # (e.g. 1Password, HashiCorp Vault) without having to update every
+    # call site.
+    return f" (from {source})"
+

 def _format_offending_chars(value: str, limit: int = 3) -> str:
    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
@@ -213,6 +251,12 @@ def _apply_external_secret_sources(home_path: Path) -> None:
        # and might have the same copy-paste corruption as a manually
        # edited .env (see #6843).
        _sanitize_loaded_credentials()
+        # Remember where these came from so the setup / `hermes model`
+        # flows can label detected credentials with "(from Bitwarden)" —
+        # otherwise users see "credentials ✓" with no hint that the value
+        # came from BSM rather than .env.
+        for name in result.applied:
+            _SECRET_SOURCES[name] = "bitwarden"
        print(
            f"  Bitwarden Secrets Manager: applied {len(result.applied)} "
            f"secret{'s' if len(result.applied) != 1 else ''} "
@@ -591,7 +591,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
                curses.init_pair(1, curses.COLOR_GREEN, -1)  # selected
                curses.init_pair(2, curses.COLOR_YELLOW, -1)  # header
                curses.init_pair(3, curses.COLOR_CYAN, -1)  # search
-                curses.init_pair(4, 8, -1)  # dim
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim

            cursor = 0
            scroll_offset = 0
@@ -2433,6 +2433,9 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
    ("mcp", "MCP", "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
    ("skills_hub", "Skills hub", "skills search/install"),
+    ("triage_specifier", "Triage specifier", "kanban spec fleshing"),
+    ("kanban_decomposer", "Kanban decomposer", "task decomposition"),
+    ("profile_describer", "Profile describer", "auto profile descriptions"),
    ("curator", "Curator", "skill-usage review pass"),
 ]

@@ -4662,7 +4665,9 @@ def _model_flow_copilot(config, current_model=""):
        source = creds.get("source", "")
    else:
        if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
-            print(f"  GitHub token: {api_key[:8]}... ✓ ({source})")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            bw_suffix = format_secret_source_suffix(source)
+            print(f"  GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
        elif source == "gh auth token":
            print("  GitHub token: ✓ (from `gh auth token`)")
        else:
@@ -4919,7 +4924,10 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
        return new_key, False

    # Already configured — offer K / R / C ────────────────────────────────
-    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+    from hermes_cli.env_loader import format_secret_source_suffix
+
+    source_suffix = format_secret_source_suffix(key_env) if key_env else ""
+    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
    if not key_env:
        # Nothing we can rewrite; just acknowledge and move on.
        print()
@@ -5202,7 +5210,9 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
    # Prompt for API key
    existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
    if existing_key:
-        print(f"  Bedrock API Key: {existing_key[:12]}... ✓")
+        from hermes_cli.env_loader import format_secret_source_suffix
+        source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
+        print(f"  Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
    else:
        print(f"  Endpoint: {mantle_base_url}")
        print()
@@ -5873,7 +5883,22 @@ def _model_flow_anthropic(config, current_model=""):
    if has_creds:
        # Show what we found
        if existing_key:
-            print(f"  Anthropic credentials: {existing_key[:12]}... ✓")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            from hermes_cli.auth import PROVIDER_REGISTRY
+
+            # Surface which env var supplied the key so users with
+            # Bitwarden see "(from Bitwarden)" — without this, a detected
+            # BSM key looks identical to a key in .env and users assume
+            # nothing is wired up.
+            source_suffix = ""
+            for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
+                if os.getenv(var, "").strip() == existing_key:
+                    source_suffix = format_secret_source_suffix(var)
+                    if source_suffix:
+                        break
+            print(
+                f"  Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
+            )
        elif cc_available:
            print("  Claude Code credentials: ✓ (auto-detected)")
        print()
@@ -74,8 +74,12 @@ class NousSubscriptionFeatures:
    def modal(self) -> NousFeatureState:
        return self.features["modal"]

+    @property
+    def app_tools(self) -> NousFeatureState:
+        return self.features["app_tools"]
+
    def items(self) -> Iterable[NousFeatureState]:
-        ordered = ("web", "image_gen", "tts", "browser", "modal")
+        ordered = ("web", "image_gen", "tts", "browser", "modal", "app_tools")
        for key in ordered:
            yield self.features[key]

@@ -225,6 +229,22 @@ def _resolve_browser_feature_state(
    return "local", available, active, False


+def _read_portal_app_tools_enabled(config: Optional[Dict[str, object]] = None) -> bool:
+    """Return True when the portal.app_tools config flag is on."""
+    if config is not None:
+        # Fast path: use the pre-loaded config snapshot from the caller
+        import os
+        env_val = os.getenv("PORTAL_APP_TOOLS")
+        if env_val is not None:
+            return is_truthy_value(env_val)
+        portal = config.get("portal")
+        if isinstance(portal, dict):
+            return bool(portal.get("app_tools", True))
+        return True
+    from tools.tool_backend_helpers import portal_app_tools_enabled
+    return portal_app_tools_enabled()
+
+
 def get_nous_subscription_features(
    config: Optional[Dict[str, object]] = None,
 ) -> NousSubscriptionFeatures:
@@ -313,6 +333,8 @@ def get_nous_subscription_features(
    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
+    app_gw_ready = bool(managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("tools"))
+    app_config_on = _read_portal_app_tools_enabled(config)
    modal_state = resolve_modal_backend_state(
        modal_mode,
        has_direct=direct_modal,
@@ -476,6 +498,17 @@ def get_nous_subscription_features(
            current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
            explicit_configured=terminal_backend == "modal",
        ),
+        "app_tools": NousFeatureState(
+            key="app_tools",
+            label="App tools (500+ apps)",
+            included_by_default=True,
+            available=app_gw_ready,
+            active=app_gw_ready and app_config_on,
+            managed_by_nous=app_gw_ready and app_config_on,
+            direct_override=False,
+            toolset_enabled=app_config_on,
+            current_provider="Nous Tool Gateway",
+        ),
    }

    return NousSubscriptionFeatures(
@@ -1051,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            curses.init_pair(1, curses.COLOR_GREEN, -1)
            curses.init_pair(2, curses.COLOR_YELLOW, -1)
            curses.init_pair(3, curses.COLOR_CYAN, -1)
-            curses.init_pair(4, 8, -1)  # dim gray
+            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
        cursor = 0
        scroll_offset = 0

@@ -1196,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {curses.KEY_ENTER, 10, 13}:
                if cursor < n_plugins:
@@ -1228,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {27, ord("q")}:
                # Save plugin changes on exit
@@ -78,6 +78,7 @@ CONFIGURABLE_TOOLSETS = [
    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
    ("yuanbao",          "🤖 Yuanbao",                  "group info, member queries, DM"),
    ("computer_use",     "🖱️  Computer Use (macOS)",     "background desktop control via cua-driver"),
+    ("app_tools",        "🔌 App Integrations (500+)",   "Gmail, Slack, GitHub, Jira, Notion, etc. via Nous tool gateway"),
 ]

 # Toolsets that are OFF by default for new installs.
@@ -311,6 +312,16 @@ TOOL_CATEGORIES = {
    "image_gen": {
        "name": "Image Generation",
        "icon": "🎨",
+        # Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
+        # OpenAI Codex, and xAI are injected at runtime from each
+        # ``plugins.image_gen.<vendor>`` package via
+        # ``_plugin_image_gen_providers()`` in ``_visible_providers``.
+        # Only non-provider UX setup-flow rows remain here:
+        #   - "Nous Subscription" — managed FAL billed via the Nous
+        #     subscription (requires_nous_auth + override_env_vars).
+        #     Uses the fal plugin as the underlying backend but has a
+        #     distinct setup UX.
+        # Mirrors the shape browser/video_gen ship today.
        "providers": [
            {
                "name": "Nous Subscription",
@@ -322,15 +333,6 @@ TOOL_CATEGORIES = {
                "override_env_vars": ["FAL_KEY"],
                "imagegen_backend": "fal",
            },
-            {
-                "name": "FAL.ai",
-                "badge": "paid",
-                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
-                "env_vars": [
-                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
-                ],
-                "imagegen_backend": "fal",
-            },
        ],
    },
    "video_gen": {
@@ -482,6 +484,11 @@ TOOLSET_ENV_REQUIREMENTS = {
 # ─── Post-Setup Hooks ─────────────────────────────────────────────────────────


+def _cua_driver_cmd() -> str:
+    """Return the cua-driver executable name/path, honoring non-empty overrides."""
+    return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
+
+
 def _pip_install(
    args: List[str],
    *,
@@ -550,6 +557,55 @@ def _pip_install(
    )


+
+def _check_cua_driver_asset_for_arch() -> bool:
+    """Check whether the latest CUA release ships an asset for this architecture.
+
+    Returns True if the asset likely exists (or if we cannot determine it).
+    Returns False and prints a warning when the asset is confirmed missing,
+    so callers can skip the install attempt and avoid a raw 404.
+    """
+    import platform as _plat
+    import urllib.request
+
+    machine = _plat.machine()  # "x86_64" or "arm64"
+    if machine == "arm64":
+        # arm64 (Apple Silicon) assets are always published.
+        return True
+
+    # x86_64 / Intel — probe the latest release for an architecture-specific
+    # asset before falling through to the upstream installer.
+    api_url = (
+        "https://api.github.com/repos/trycua/cua/releases/latest"
+    )
+    try:
+        req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            release = _json.loads(resp.read().decode())
+        tag = release.get("tag_name", "")
+        assets = release.get("assets", [])
+        arch_names = {"x86_64", "amd64"}
+        has_asset = any(
+            any(a in a_info.get("name", "").lower() for a in arch_names)
+            for a_info in assets
+        )
+        if not has_asset:
+            _print_warning(
+                f"    Latest CUA release ({tag}) has no Intel (x86_64) asset."
+            )
+            _print_info(
+                "    CUA Driver currently only ships Apple Silicon builds."
+            )
+            _print_info(
+                "    See: https://github.com/trycua/cua/issues/1493"
+            )
+            return False
+    except Exception:
+        # Network / API failure — proceed and let the installer handle it.
+        pass
+    return True
+
+
 def install_cua_driver(upgrade: bool = False) -> bool:
    """Install or refresh the cua-driver binary used by Computer Use.

@@ -579,7 +635,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    Computer Use (cua-driver) is macOS-only; skipping.")
        return False

-    binary = shutil.which("cua-driver")
+    driver_cmd = _cua_driver_cmd()
+    binary = shutil.which(driver_cmd)

    # Not installed → fresh install path (only when caller asked for it).
    if not binary and not upgrade:
@@ -587,18 +644,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
            _print_warning("    curl not found — install manually:")
            _print_info("      https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
            return False
+        if not _check_cua_driver_asset_for_arch():
+            return False
        return _run_cua_driver_installer(label="Installing")

    # Already installed and caller didn't ask to upgrade → just confirm.
    if binary and not upgrade:
        try:
            version = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
-            _print_success(f"    cua-driver already installed: {version or 'unknown version'}")
+            _print_success(f"    {driver_cmd} already installed: {version or 'unknown version'}")
        except Exception:
-            _print_success("    cua-driver already installed.")
+            _print_success(f"    {driver_cmd} already installed.")
        _print_info("    Grant macOS permissions if not done yet:")
        _print_info("      System Settings > Privacy & Security > Accessibility")
        _print_info("      System Settings > Privacy & Security > Screen Recording")
@@ -609,11 +668,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    curl not found — cannot refresh cua-driver.")
        return bool(binary)

+    if not _check_cua_driver_asset_for_arch():
+        return bool(binary)
+
    if binary:
        # Show before/after version when we have a baseline. Best-effort.
        try:
            before = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
        except Exception:
@@ -625,13 +687,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
    if ok and before:
        try:
            after = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
            if after and after != before:
-                _print_success(f"    cua-driver upgraded: {before} → {after}")
+                _print_success(f"    {driver_cmd} upgraded: {before} → {after}")
            elif after:
-                _print_info(f"    cua-driver up to date: {after}")
+                _print_info(f"    {driver_cmd} up to date: {after}")
        except Exception:
            pass
    return ok
@@ -655,11 +717,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
        _print_info(f"    {label} cua-driver (macOS background computer-use)...")
    else:
        _print_info(f"    {label} cua-driver...")
+    driver_cmd = _cua_driver_cmd()
    try:
        result = subprocess.run(install_cmd, shell=True, timeout=300)
-        if result.returncode == 0 and shutil.which("cua-driver"):
+        if result.returncode == 0 and shutil.which(driver_cmd):
            if verbose:
-                _print_success("    cua-driver installed.")
+                _print_success(f"    {driver_cmd} installed.")
                _print_info("    IMPORTANT — grant macOS permissions now:")
                _print_info("      System Settings > Privacy & Security > Accessibility")
                _print_info("      System Settings > Privacy & Security > Screen Recording")
@@ -1506,12 +1569,9 @@ def _plugin_image_gen_providers() -> list[dict]:
    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
    row but carries an ``image_gen_plugin_name`` marker so downstream
    code (config writing, model picker) knows to route through the
-    plugin registry instead of the in-tree FAL backend.
-
-    FAL is skipped — it's already exposed by the hardcoded
-    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
-    a plugin in a follow-up PR, the hardcoded entries go away and this
-    function surfaces it alongside OpenAI automatically.
+    plugin registry. Every image-gen backend is a plugin now — there
+    are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
+    this function to dedupe against (see issue #26241).
    """
    try:
        from agent.image_gen_registry import list_providers
@@ -1524,9 +1584,6 @@ def _plugin_image_gen_providers() -> list[dict]:

    rows: list[dict] = []
    for provider in providers:
-        if getattr(provider, "name", None) == "fal":
-            # FAL has its own hardcoded rows today.
-            continue
        try:
            schema = provider.get_setup_schema()
        except Exception:
@@ -1751,7 +1808,7 @@ _POST_SETUP_INSTALLED: dict = {
    # entry when (a) the post_setup is the ONLY install side-effect for
    # a no-key provider, and (b) an installed-state check is cheap and
    # doesn't trigger a heavy import.
-    "cua_driver": lambda: bool(shutil.which("cua-driver")),
+    "cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
 }


@@ -975,11 +975,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
    "vision",
    "web_extract",
    "compression",
-    "session_search",
    "skills_hub",
    "approval",
    "mcp",
    "title_generation",
+    "triage_specifier",
+    "kanban_decomposer",
+    "profile_describer",
    "curator",
 )

@@ -0,0 +1,85 @@
+Create a professional infographic following these specifications:
+
+## Image Specifications
+
+- **Type**: Infographic
+- **Layout**: bento-grid
+- **Style**: technical-schematic (engineering blueprint variant)
+- **Aspect Ratio**: 1:1 (square)
+- **Language**: English
+
+## Core Principles
+
+- Follow the bento-grid layout precisely with varied cell sizes
+- Apply technical-schematic aesthetics consistently throughout
+- Keep information concise, highlight keywords and core concepts
+- Use ample whitespace for visual clarity
+- Maintain clear visual hierarchy with a hero cell for the headline metric
+
+## Style Guidelines (technical-schematic blueprint)
+
+- Color palette: deep blue background (#1E3A5F), white lines and text, amber accent (#F59E0B) ONLY on the hero metric and critical deltas, cyan callouts for measurement annotations
+- Grid pattern overlay across the entire canvas — fine white grid lines on the deep blue background
+- All-caps technical stencil typography for headers; clean sans-serif for body
+- Dimension lines with arrowheads connecting metrics to their cells
+- Technical symbols where appropriate (gear icons, flow arrows, modular block diagrams)
+- Consistent stroke weights — bold for cell borders, thin for grid, medium for connector lines
+- Engineering spec-sheet aesthetic: feels like a printed architectural blueprint, austere and precise
+
+## Layout Guidelines (bento-grid)
+
+- Hero cell (TOP-CENTER or LEFT, occupying ~40% of canvas): "−61 COMPLEXITY · 79 → 18" headline metric in massive amber-on-blue, with subtitle "convert_messages_to_anthropic refactored"
+- 7 helper cells in a 2x4 or 3x3 grid showing each extracted helper as its own modular block — each cell has the helper name in all-caps, its complexity number, and one-line role
+- Metrics strip cell: BEFORE/AFTER table with deltas (185 statements → ~70, 79 C → 18 C, +5 violations intentional)
+- Test validation cell: "152/152 + 213/213 PASS" with checkmark stencil
+- Footer strip across bottom: "PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor · NousResearch/hermes-agent"
+
+## Content to render
+
+**Main title (top of canvas, all caps):** "ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION"
+**Subtitle:** "PR #27784 — convert_messages_to_anthropic refactor"
+
+**Hero cell (largest, amber accent):**
+- "−61"
+- "CYCLOMATIC COMPLEXITY"
+- "79 → 18 MAX (−77%)"
+- Subtext: "convert_messages_to_anthropic · pure code motion · zero behavior change"
+
+**7 helper cells (one per helper, each its own modular block):**
+
+1. _convert_assistant_message · C<10 · "Assistant msg → content blocks"
+2. _convert_tool_message_to_result · C=12 · "Tool msg → tool_result + merge"
+3. _convert_user_message · C<10 · "User msg validation"
+4. _strip_orphaned_tool_blocks · C=15 · "Orphan tool_use removal"
+5. _merge_consecutive_roles · C=13 · "Anthropic role-alternation"
+6. _manage_thinking_signatures · C=18 · "Strip/preserve by endpoint"
+7. _evict_old_screenshots · C<10 · "Keep most recent 3 images"
+
+**Metrics cell (table format with arrows):**
+- MAX FUNCTION COMPLEXITY: 79 → 18 (−77%)
+- MAX STATEMENTS/FUNCTION: 185 → ~70 (−62%)
+- LOC FILE-WIDE: −4
+- MAIN FUNCTION LOC: 395 → 63
+
+**Test validation cell (checkmark stencil):**
+- test_anthropic_adapter.py: 152/152 PASS
+- test_auxiliary_client.py: 172/172 PASS
+- test_azure_identity_adapter.py: 39/39 PASS
+- test_bedrock_1m_context.py: 2/2 PASS
+
+**Behavior preservation cell:**
+"ZERO LOGIC CHANGES · ANTHROPIC + KIMI + DEEPSEEK + MINIMAX + AZURE FOUNDRY + BEDROCK SEMANTICS PRESERVED"
+
+**Footer strip:**
+"PR #27784 · agent/anthropic_adapter.py · cherry-picked from #23968 · @kshitijk4poor · NousResearch/hermes-agent"
+
+## Text Requirements
+
+- All text in English, all-caps for headers
+- Hero metric "−61" in amber (#F59E0B), oversized, with thick blueprint stencil treatment
+- Helper names in white technical stencil
+- Complexity numbers (C=12, C=18, etc.) in cyan callouts
+- "BEFORE" labels in white-on-blue, "AFTER" labels in amber-on-blue
+- Footer in small white stencil
+
+Generate the infographic now as a square engineering blueprint.
@@ -0,0 +1,66 @@
+# Infographic: PR #27784 — convert_messages_to_anthropic refactor
+
+## Hero metric
+**−61 cyclomatic complexity** in `agent/anthropic_adapter.py` (79 → 18 max).
+**−4 LOC** net file-wide. **77% drop** in single-function complexity ceiling.
+
+## Title
+ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION
+PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor
+
+## Section 1: BEFORE (left side)
+**convert_messages_to_anthropic**
+- 185 statements
+- 90 branches
+- Cyclomatic: 79
+- Did 7 jobs in one function
+
+Inline responsibilities mixed together:
+1. Walk + dispatch by role
+2. Tool-result conversion
+3. Orphan tool-use stripping
+4. Same-role merging
+5. Thinking-signature management
+6. Screenshot eviction
+7. Final assembly
+
+## Section 2: AFTER (right side)
+**convert_messages_to_anthropic** — now 63 lines, C<10
+Plus 7 single-responsibility helpers:
+
+| Helper | C | Role |
+|---|---|---|
+| _convert_assistant_message | <10 | Assistant msg → content blocks |
+| _convert_tool_message_to_result | 12 | Tool msg → tool_result + merge |
+| _convert_user_message | <10 | User msg validation + conversion |
+| _strip_orphaned_tool_blocks | 15 | Strip orphan tool_use + tool_result |
+| _merge_consecutive_roles | 13 | Anthropic role-alternation enforce |
+| _manage_thinking_signatures | 18 | Strip/preserve/downgrade by endpoint |
+| _evict_old_screenshots | <10 | Keep most recent 3 images |
+
+## Section 3: METRICS
+| Metric | Before | After | Δ |
+|---|---:|---:|---:|
+| Max function complexity | 79 | 18 | −77% |
+| Max statements/function | 185 | ~70 | −62% |
+| LOC (file-wide) | — | — | **−4** |
+| C901 violations | 3 | 8 | +5 (intentional split) |
+
+## Section 4: ZERO BEHAVIOR CHANGE
+- Pure code motion — no logic edits
+- Mutating helpers update `result` in place (same as inline)
+- `_merge_consecutive_roles` returns new list — caller rebinds
+- Anthropic / Kimi / DeepSeek / MiniMax / Azure Foundry / Bedrock semantics preserved
+- Thinking-signature handling identical to pre-refactor
+
+## Section 5: TEST VALIDATION
+- tests/agent/test_anthropic_adapter.py — **152 / 152 pass**
+- tests/agent/test_auxiliary_client.py — **172 / 172 pass**
+- tests/agent/test_azure_identity_adapter.py — **39 / 39 pass**
+- tests/agent/test_bedrock_1m_context.py — **2 / 2 pass**
+
+## Footer
+File: agent/anthropic_adapter.py
+Original PR: #27784 (cherry-pick of #23968)
+Salvage commit: 9c102b937 (kshitijk4poor authorship preserved)
+Repo: NousResearch/hermes-agent
@@ -148,7 +148,7 @@ class BrowserUseBrowserProvider(BrowserProvider):

        return {
            "api_key": managed.nous_user_token,
-            "base_url": managed.gateway_origin.rstrip("/"),
+            "base_url": managed.resolved_origin.rstrip("/"),
            "managed_mode": True,
        }

@@ -0,0 +1,182 @@
+"""FAL.ai image generation backend.
+
+Wraps the 18-model FAL catalog (FLUX 2, Z-Image, Nano Banana, GPT
+Image 1.5, Recraft, Imagen 4, Qwen, Ideogram, …) as an
+:class:`ImageGenProvider` implementation.
+
+The heavy lifting — model catalog, payload construction, request
+submission, managed-Nous-gateway selection, Clarity Upscaler chaining
+— lives in :mod:`tools.image_generation_tool`. This plugin reaches into
+that module via call-time indirection (``import tools.image_generation_tool as _it``)
+so:
+
+* the existing test suite (``tests/tools/test_image_generation.py``,
+  ``tests/tools/test_managed_media_gateways.py``) keeps patching
+  ``image_tool._submit_fal_request`` / ``image_tool.fal_client`` /
+  ``image_tool._managed_fal_client`` without modification, and
+* there's exactly one canonical FAL code path on disk — the plugin is a
+  registration adapter, not a parallel implementation.
+
+See issue #26241 for the migration plan and the
+``plugin-extraction-test-patch-compatibility.md`` rules this follows.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    resolve_aspect_ratio,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class FalImageGenProvider(ImageGenProvider):
+    """FAL.ai image generation backend.
+
+    Delegates to ``tools.image_generation_tool.image_generate_tool`` so
+    the in-tree FAL implementation (model catalog, payload builder,
+    managed-gateway selection, Clarity Upscaler chaining) is the single
+    source of truth. Everything is resolved at call time via the
+    ``_it`` indirection so tests can monkey-patch the legacy module.
+    """
+
+    @property
+    def name(self) -> str:
+        return "fal"
+
+    @property
+    def display_name(self) -> str:
+        return "FAL.ai"
+
+    def is_available(self) -> bool:
+        # Available when direct FAL_KEY is set OR the managed Nous
+        # gateway resolves a fal-queue origin. Both checks come from the
+        # legacy module so this provider tracks whatever logic ships
+        # there.
+        import tools.image_generation_tool as _it
+        try:
+            return bool(_it.check_fal_api_key())
+        except Exception:  # noqa: BLE001 — defensive; never break the picker
+            return False
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        import tools.image_generation_tool as _it
+        return [
+            {
+                "id": model_id,
+                "display": meta.get("display", model_id),
+                "speed": meta.get("speed", ""),
+                "strengths": meta.get("strengths", ""),
+                "price": meta.get("price", ""),
+            }
+            for model_id, meta in _it.FAL_MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        import tools.image_generation_tool as _it
+        return _it.DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "FAL.ai",
+            "badge": "paid",
+            "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
+            "env_vars": [
+                {
+                    "key": "FAL_KEY",
+                    "prompt": "FAL API key",
+                    "url": "https://fal.ai/dashboard/keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image via the legacy FAL pipeline.
+
+        Forwards prompt + aspect_ratio (and any forward-compat extras
+        the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
+        then reshapes its JSON-string response into the provider-ABC
+        dict format consumed by ``_dispatch_to_plugin_provider``.
+        """
+        import tools.image_generation_tool as _it
+
+        aspect = resolve_aspect_ratio(aspect_ratio)
+        passthrough = {
+            key: kwargs[key]
+            for key in (
+                "num_inference_steps",
+                "guidance_scale",
+                "num_images",
+                "output_format",
+                "seed",
+            )
+            if key in kwargs and kwargs[key] is not None
+        }
+
+        try:
+            raw = _it.image_generate_tool(
+                prompt=prompt,
+                aspect_ratio=aspect,
+                **passthrough,
+            )
+        except Exception as exc:  # noqa: BLE001 — never raise out of generate
+            logger.warning("FAL image_generate_tool raised: %s", exc, exc_info=True)
+            return {
+                "success": False,
+                "image": None,
+                "error": f"FAL image generation failed: {exc}",
+                "error_type": type(exc).__name__,
+                "provider": "fal",
+                "prompt": prompt,
+                "aspect_ratio": aspect,
+            }
+
+        try:
+            response = json.loads(raw) if isinstance(raw, str) else raw
+        except Exception:  # noqa: BLE001
+            response = {"success": False, "image": None, "error": "Invalid JSON from FAL pipeline"}
+
+        if not isinstance(response, dict):
+            response = {
+                "success": False,
+                "image": None,
+                "error": "FAL pipeline returned a non-dict response",
+                "error_type": "provider_contract",
+            }
+
+        # Stamp provider/prompt/aspect_ratio so downstream consumers see
+        # the uniform shape declared in ``agent.image_gen_provider``.
+        response.setdefault("provider", "fal")
+        response.setdefault("prompt", prompt)
+        response.setdefault("aspect_ratio", aspect)
+        # Annotate model best-effort — the legacy pipeline resolves it
+        # internally, so query it after the fact for the response shape.
+        if "model" not in response:
+            try:
+                model_id, _meta = _it._resolve_fal_model()
+                response["model"] = model_id
+            except Exception:  # noqa: BLE001
+                pass
+        return response
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``FalImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(FalImageGenProvider())
@@ -0,0 +1,7 @@
+name: fal
+version: 1.0.0
+description: "FAL.ai image generation backend (flux-2-klein, flux-2-pro, nano-banana, gpt-image-1.5, recraft-v3, etc.)."
+author: NousResearch
+kind: backend
+requires_env:
+  - FAL_KEY
@@ -47,6 +47,25 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
 _TIMEOUT = 30.0
 _REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")

+# Maps the viking_remember `category` enum to a viking:// subdirectory.
+# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
+_CATEGORY_SUBDIR_MAP = {
+    "preference": "preferences",
+    "entity": "entities",
+    "event": "events",
+    "case": "cases",
+    "pattern": "patterns",
+}
+_DEFAULT_MEMORY_SUBDIR = "preferences"
+
+# Maps the built-in memory tool's `target` ("user" vs "memory") to a subdir
+# for on_memory_write mirroring. User profile facts → preferences; agent
+# notes / observations → patterns. Anything unknown falls back to the default.
+_MEMORY_WRITE_TARGET_SUBDIR_MAP = {
+    "user": "preferences",
+    "memory": "patterns",
+}
+

 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
@@ -607,24 +626,35 @@ class OpenVikingMemoryProvider(MemoryProvider):
        except Exception as e:
            logger.warning("OpenViking session commit failed: %s", e)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes to OpenViking as explicit memories."""
+    def _build_memory_uri(self, subdir: str) -> str:
+        """Build a viking:// memory URI under the configured user/subdir."""
+        slug = uuid.uuid4().hex[:12]
+        return f"viking://user/{self._user}/memories/{subdir}/mem_{slug}.md"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Mirror built-in memory writes to OpenViking via content/write."""
        if not self._client or action != "add" or not content:
            return

+        subdir = _MEMORY_WRITE_TARGET_SUBDIR_MAP.get(target, _DEFAULT_MEMORY_SUBDIR)
+        uri = self._build_memory_uri(subdir)
+
        def _write():
            try:
                client = _VikingClient(
                    self._endpoint, self._api_key,
                    account=self._account, user=self._user, agent=self._agent,
                )
-                # Add as a user message with memory context so the commit
-                # picks it up as an explicit memory during extraction
-                client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-                    "role": "user",
-                    "parts": [
-                        {"type": "text", "text": f"[Memory note — {target}] {content}"},
-                    ],
+                client.post("/api/v1/content/write", {
+                    "uri": uri,
+                    "content": content,
+                    "mode": "create",
                })
            except Exception as e:
                logger.debug("OpenViking memory mirror failed: %s", e)
@@ -858,24 +888,27 @@ class OpenVikingMemoryProvider(MemoryProvider):
        if not content:
            return tool_error("content is required")

-        # Store as a session message that will be extracted during commit.
-        # The category hint helps OpenViking's extraction classify correctly.
        category = args.get("category", "")
-        text = f"[Remember] {content}"
-        if category:
-            text = f"[Remember — {category}] {content}"
+        subdir = _CATEGORY_SUBDIR_MAP.get(category, _DEFAULT_MEMORY_SUBDIR)
+        uri = self._build_memory_uri(subdir)

-        self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-            "role": "user",
-            "parts": [
-                {"type": "text", "text": text},
-            ],
-        })
-
-        return json.dumps({
-            "status": "stored",
-            "message": "Memory recorded. Will be extracted and indexed on session commit.",
-        })
+        # Write directly via content/write API.
+        # This creates the file, stores the content, and queues vector indexing
+        # in a single call — no dependency on session commit / VLM extraction.
+        try:
+            result = self._client.post("/api/v1/content/write", {
+                "uri": uri,
+                "content": content,
+                "mode": "create",
+            })
+            written = result.get("result", {}).get("written_bytes", 0)
+            return json.dumps({
+                "status": "stored",
+                "message": f"Memory stored ({written}b) and queued for vector indexing.",
+            })
+        except Exception as e:
+            logger.error("OpenViking content/write failed: %s", e)
+            return tool_error(f"Failed to store memory: {e}")

    def _tool_add_resource(self, args: dict) -> str:
        url = args.get("url", "")
@@ -282,20 +282,24 @@ def _build_payload(


 # ---------------------------------------------------------------------------
-# fal_client lazy import (same pattern as image_generation_tool)
+# fal_client lazy import (shared with image_generation_tool via fal_common)
 # ---------------------------------------------------------------------------

 _fal_client: Any = None


 def _load_fal_client() -> Any:
+    """Lazy-load the ``fal_client`` SDK and cache it on this module.
+
+    Delegates the actual import to :func:`tools.fal_common.import_fal_client`
+    so the ``lazy_deps`` ensure-install handling stays in one place.
+    """
    global _fal_client
    if _fal_client is not None:
        return _fal_client
-    import fal_client  # type: ignore
-
-    _fal_client = fal_client
-    return fal_client
+    from tools.fal_common import import_fal_client
+    _fal_client = import_fal_client()
+    return _fal_client


 # ---------------------------------------------------------------------------
@@ -238,7 +238,7 @@ def _get_firecrawl_client() -> Any:

        kwargs = {
            "api_key": managed_gateway.nous_user_token,
-            "api_url": managed_gateway.gateway_origin,
+            "api_url": managed_gateway.resolved_origin,
        }
        client_config = (
            "tool-gateway",
@@ -3357,6 +3357,25 @@ class AIAgent:
            return content

        if self._model_supports_vision():
+            # Vision-capable on paper — but if we've already learned in this
+            # session that the active (provider, model) rejects list-type
+            # tool content (e.g. Xiaomi MiMo's 400 "text is not set"),
+            # short-circuit to a text summary so we don't burn another
+            # round-trip relearning the same lesson.  Cache populated by
+            # the 400 recovery path in agent.conversation_loop.  Transient
+            # per-session; next session retries.
+            key = (
+                (getattr(self, "provider", "") or "").strip().lower(),
+                (getattr(self, "model", "") or "").strip(),
+            )
+            no_list = getattr(self, "_no_list_tool_content_models", None)
+            if no_list and key in no_list:
+                logger.debug(
+                    "Tool %s: model %s/%s known to reject list-type tool "
+                    "content this session — sending text summary",
+                    tool_name, key[0], key[1],
+                )
+                return _multimodal_text_summary(result)
            return content

        summary = _multimodal_text_summary(result)
@@ -3385,6 +3404,80 @@ class AIAgent:
        from agent.conversation_compression import try_shrink_image_parts_in_messages
        return try_shrink_image_parts_in_messages(api_messages)

+    def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
+        """Downgrade list-type tool messages to text summaries in-place.
+
+        Recovery path for providers that reject list-type tool message content
+        (e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344).  Walks
+        ``api_messages`` for any ``role: "tool"`` message whose ``content`` is
+        a list containing image parts, replaces the content with the existing
+        text part(s) (or a minimal placeholder if none survive), and records
+        the active (provider, model) in ``self._no_list_tool_content_models``
+        so subsequent ``_tool_result_content_for_active_model`` calls in this
+        session preemptively downgrade screenshots without a round-trip.
+
+        Returns True when at least one tool message was downgraded — the
+        caller (the 400 recovery branch in ``agent.conversation_loop``) uses
+        this to decide whether to retry the API call with the modified
+        history or surface the original error.
+        """
+        if not isinstance(api_messages, list):
+            return False
+
+        # Record (provider, model) so we don't relearn this lesson.
+        key = (
+            (getattr(self, "provider", "") or "").strip().lower(),
+            (getattr(self, "model", "") or "").strip(),
+        )
+        if not hasattr(self, "_no_list_tool_content_models"):
+            self._no_list_tool_content_models = set()
+        if key[1]:  # only record when we actually have a model id
+            self._no_list_tool_content_models.add(key)
+
+        changed = False
+        for msg in api_messages:
+            if not isinstance(msg, dict) or msg.get("role") != "tool":
+                continue
+            content = msg.get("content")
+            if not isinstance(content, list):
+                continue
+
+            # Salvage any text parts so the model still sees some signal.
+            text_parts: List[str] = []
+            had_image = False
+            for part in content:
+                if not isinstance(part, dict):
+                    if isinstance(part, str) and part.strip():
+                        text_parts.append(part.strip())
+                    continue
+                ptype = part.get("type")
+                if ptype == "image_url" or ptype == "input_image":
+                    had_image = True
+                    continue
+                if ptype in {"text", "input_text"}:
+                    text = str(part.get("text") or "").strip()
+                    if text:
+                        text_parts.append(text)
+
+            if not had_image:
+                # List-type content but no image parts — leave alone (some
+                # providers reject ANY list content, but stripping a
+                # text-only list doesn't reduce ambiguity; let the caller
+                # surface the original error if this turns out to be the
+                # case).
+                continue
+
+            if text_parts:
+                msg["content"] = "\n\n".join(text_parts)
+            else:
+                msg["content"] = (
+                    "[image content removed — provider does not accept "
+                    "list-type tool message content]"
+                )
+            changed = True
+
+        return changed
+
    def _anthropic_preserve_dots(self) -> bool:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
@@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 AUTHOR_MAP = {
    # teknium (multiple emails)
    "teknium1@gmail.com": "teknium1",
+    "cipherframe@users.noreply.github.com": "CipherFrame",
    "me@promplate.dev": "CNSeniorious000",
    "yichengqiao21@gmail.com": "YarrowQiao",
    "erhanyasarx@gmail.com": "erhnysr",
@@ -59,14 +60,18 @@ AUTHOR_MAP = {
    "mgongzai@gmail.com": "vKongv",
    "0x.badfriend@gmail.com": "discodirector",
    "altriatree@gmail.com": "TruaShamu",
+    "contact-me@stark-x.cn": "Stark-X",
    "nat@nthrow.io": "nthrow",
    "m@mobrienv.dev": "mikeyobrien",
    "saeed919@pm.me": "falasi",
+    "chrisdlc119@outlook.com": "chdlc",
    "omar@techdeveloper.site": "nycomar",
    "qiyin.zuo@pcitc.com": "qiyin-code",
    "mr.aashiz@gmail.com": "aashizpoudel",
    "70629228+shaun0927@users.noreply.github.com": "shaun0927",
    "98262967+Bihruze@users.noreply.github.com": "Bihruze",
+    "189280367+Lempkey@users.noreply.github.com": "Lempkey",
+    "leovillalbajr@gmail.com": "Lempkey",
    "nidhi2894@gmail.com": "nidhi-singh02",
    "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
    "oleksii.lisikh@gmail.com": "olisikh",
@@ -930,6 +935,8 @@ AUTHOR_MAP = {
    "holynn@placeholder.local": "holynn-q",
    "agent@hermes.local": "jacdevos",
    "sunsky.lau@gmail.com": "liuhao1024",
+    "fabianoeq@gmail.com": "rodrigoeqnit",
+    "178342791+sgtworkman@users.noreply.github.com": "sgtworkman",
    "qiuqfang98@qq.com": "keepcalmqqf",
    "261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026",
    "yanzh.su@gmail.com": "YanzhongSu",
@@ -56,6 +56,7 @@ class TestFailoverReason:
            "overloaded", "server_error", "timeout",
            "context_overflow", "payload_too_large", "image_too_large",
            "model_not_found", "format_error",
+            "multimodal_tool_content_unsupported",
            "provider_policy_blocked",
            "thinking_signature", "long_context_tier",
            "oauth_long_context_beta_forbidden",
@@ -1256,3 +1257,66 @@ class TestRateLimitErrorWithoutStatusCode:
        e.status_code = None
        result = classify_api_error(e, provider="copilot", model="gpt-4o")
        assert result.reason != FailoverReason.rate_limit
+
+
+
+# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
+
+class TestMultimodalToolContentUnsupported:
+    """Issue #27344 — providers that reject list-type tool message content
+    should be classified as ``multimodal_tool_content_unsupported`` so the
+    retry loop can downgrade screenshots to text and try again.
+    """
+
+    def test_xiaomi_mimo_text_is_not_set_pattern(self):
+        """The actual Xiaomi MiMo 400 wording from the bug report."""
+        e = MockAPIError(
+            "Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+        assert result.retryable is True
+
+    def test_generic_tool_message_must_be_string(self):
+        e = MockAPIError(
+            "tool message content must be a string",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="custom", model="some-model")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_expected_string_got_list(self):
+        e = MockAPIError(
+            "Schema validation failed: expected string, got list",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="custom", model="some-model")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
+        """Some providers return a 400 whose message contains BOTH
+        'text is not set' and a length-shaped phrase; the tool-content
+        recovery is cheaper than compression so it must win the priority.
+        """
+        e = MockAPIError(
+            "text is not set; context length exceeded",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_no_status_code_path_also_classifies(self):
+        """When the error reaches us without a status code (transport
+        layer ate it) the message-only classifier branch must also
+        recognise the pattern.
+        """
+        e = MockTransportError("tool_call.content must be string")
+        result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_unrelated_400_is_not_misclassified(self):
+        """Make sure the patterns don't false-positive on normal 400s."""
+        e = MockAPIError("bad request: missing field 'model'", status_code=400)
+        result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
+        assert result.reason != FailoverReason.multimodal_tool_content_unsupported
@@ -1060,3 +1060,191 @@ class TestHonchoCadenceTracking:
        p.on_turn_start(2, "second message")
        should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
        assert should_skip, "Second turn (turn 2) SHOULD be skipped"
+
+
+class TestMemoryToolToolsetGate:
+    """Issue #5544: memory provider tools must respect platform_toolsets.
+
+    Before the fix, MemoryManager.get_all_tool_schemas() output was appended
+    to AIAgent.tools unconditionally in agent_init.py — bypassing the
+    enabled_toolsets filter. Result: `platform_toolsets: telegram: []`
+    still leaked fact_store and other memory tools into the tool surface,
+    causing 10x latency on local models (Qwen3-30B: 1.7s → 42s) and
+    tool-call loops on small models.
+
+    These tests mirror the gate logic in agent/agent_init.py around the
+    memory provider tool injection block. The gate condition is:
+
+        enabled_toolsets is None        → no filter, inject (backward compat)
+        "memory" in enabled_toolsets    → user opted in, inject
+        otherwise (incl. [])            → skip injection
+    """
+
+    @staticmethod
+    def _run_memory_injection(enabled_toolsets, memory_manager):
+        """Simulate the gated memory-tool injection block from agent_init.py."""
+        tools = []
+        valid_tool_names = set()
+
+        if memory_manager and tools is not None and (
+            enabled_toolsets is None or "memory" in enabled_toolsets
+        ):
+            _existing = {
+                t.get("function", {}).get("name")
+                for t in tools
+                if isinstance(t, dict)
+            }
+            for _schema in memory_manager.get_all_tool_schemas():
+                _tname = _schema.get("name", "")
+                if _tname and _tname in _existing:
+                    continue
+                tools.append({"type": "function", "function": _schema})
+                if _tname:
+                    valid_tool_names.add(_tname)
+                    _existing.add(_tname)
+
+        return tools, valid_tool_names
+
+    def _mgr_with_tools(self, *tool_names):
+        """Build a MemoryManager whose providers expose the named tool schemas."""
+        mgr = MemoryManager()
+        p = FakeMemoryProvider(
+            "ext",
+            tools=[{"name": n, "description": n, "parameters": {}} for n in tool_names],
+        )
+        mgr.add_provider(p)
+        return mgr
+
+    def test_none_toolsets_injects(self):
+        """enabled_toolsets=None (no filter) injects memory tools — backward compat."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(None, mgr)
+        assert "fact_store" in names
+        assert any(t["function"]["name"] == "fact_store" for t in tools)
+
+    def test_memory_in_toolsets_injects(self):
+        """enabled_toolsets including 'memory' injects memory tools."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(["terminal", "memory", "web"], mgr)
+        assert "fact_store" in names
+
+    def test_empty_toolsets_blocks_injection(self):
+        """`platform_toolsets: telegram: []` must suppress memory tools. (#5544)"""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection([], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_toolsets_without_memory_blocks_injection(self):
+        """Toolset list that doesn't name 'memory' must suppress injection."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(["terminal", "web"], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_no_memory_manager_no_injection(self):
+        """Gate is moot without a memory manager."""
+        tools, names = self._run_memory_injection(None, None)
+        assert tools == []
+
+    def test_multiple_schemas_all_blocked_together(self):
+        """When the gate is closed, no memory tools leak — not even partially."""
+        mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
+        tools, names = self._run_memory_injection(["terminal"], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_multiple_schemas_all_injected_when_enabled(self):
+        """When the gate is open, every memory tool schema is injected."""
+        mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
+        tools, names = self._run_memory_injection(None, mgr)
+        assert names == {"fact_store", "memory_search", "memory_add"}
+
+
+class TestContextEngineToolsetGate:
+    """Issue #5544 (sibling): context engine tools follow the same gate.
+
+    `agent.context_compressor.get_tool_schemas()` (e.g. lcm_grep, lcm_describe,
+    lcm_expand) was appended to AIAgent.tools unconditionally. Same blind
+    injection class as the memory bug; same local-model penalty. Gate name:
+    "context_engine" (matches the existing plugin-system convention).
+    """
+
+    @staticmethod
+    def _run_context_engine_injection(enabled_toolsets, compressor):
+        """Simulate the gated context-engine injection block from agent_init.py."""
+        tools = []
+        valid_tool_names = set()
+        engine_tool_names = set()
+
+        if (
+            compressor is not None
+            and tools is not None
+            and (
+                enabled_toolsets is None
+                or "context_engine" in enabled_toolsets
+            )
+        ):
+            _existing = {
+                t.get("function", {}).get("name")
+                for t in tools
+                if isinstance(t, dict)
+            }
+            for _schema in compressor.get_tool_schemas():
+                _tname = _schema.get("name", "")
+                if _tname and _tname in _existing:
+                    continue
+                tools.append({"type": "function", "function": _schema})
+                if _tname:
+                    valid_tool_names.add(_tname)
+                    engine_tool_names.add(_tname)
+                    _existing.add(_tname)
+
+        return tools, valid_tool_names, engine_tool_names
+
+    class _FakeCompressor:
+        def __init__(self, schemas):
+            self._schemas = schemas
+
+        def get_tool_schemas(self):
+            return list(self._schemas)
+
+    def _compressor_with(self, *tool_names):
+        return self._FakeCompressor(
+            [{"name": n, "description": n, "parameters": {}} for n in tool_names]
+        )
+
+    def test_none_toolsets_injects(self):
+        """enabled_toolsets=None injects context-engine tools — backward compat."""
+        c = self._compressor_with("lcm_grep", "lcm_describe", "lcm_expand")
+        tools, names, engine_names = self._run_context_engine_injection(None, c)
+        assert engine_names == {"lcm_grep", "lcm_describe", "lcm_expand"}
+
+    def test_context_engine_in_toolsets_injects(self):
+        """enabled_toolsets including 'context_engine' injects the tools."""
+        c = self._compressor_with("lcm_grep")
+        tools, names, engine_names = self._run_context_engine_injection(
+            ["terminal", "context_engine"], c
+        )
+        assert "lcm_grep" in engine_names
+
+    def test_empty_toolsets_blocks_injection(self):
+        """`platform_toolsets: telegram: []` must suppress context-engine tools."""
+        c = self._compressor_with("lcm_grep")
+        tools, names, engine_names = self._run_context_engine_injection([], c)
+        assert tools == []
+        assert engine_names == set()
+
+    def test_toolsets_without_context_engine_blocks_injection(self):
+        """A toolset list that doesn't name 'context_engine' suppresses injection."""
+        c = self._compressor_with("lcm_grep", "lcm_describe")
+        tools, names, engine_names = self._run_context_engine_injection(
+            ["terminal", "memory"], c
+        )
+        assert tools == []
+        assert engine_names == set()
+
+    def test_no_compressor_no_injection(self):
+        """Gate is moot without a context_compressor."""
+        tools, names, engine_names = self._run_context_engine_injection(None, None)
+        assert tools == []
@@ -444,6 +444,7 @@ class TestBuildNousSubscriptionPrompt:
                    "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                    "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
+                    "app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
                },
            ),
        )
@@ -468,6 +469,7 @@ class TestBuildNousSubscriptionPrompt:
                    "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
                    "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
                    "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
+                    "app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, False, False, False, False, True, ""),
                },
            ),
        )
@@ -1,6 +1,12 @@
 """Tests for agent/skill_utils.py."""

-from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
+from unittest.mock import patch
+
+from agent.skill_utils import (
+    extract_skill_conditions,
+    iter_skill_index_files,
+    skill_matches_platform,
+)


 def test_metadata_as_dict_with_hermes():
@@ -94,3 +100,100 @@ def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
    found = list(iter_skill_index_files(tmp_path, "SKILL.md"))

    assert found == [real / "SKILL.md"]
+
+
+# ── skill_matches_platform on Termux ──────────────────────────────────────
+
+
+class TestSkillMatchesPlatformTermux:
+    """Termux is Linux userland on Android. Skills tagged platforms:[linux]
+    must load there regardless of whether Python reports sys.platform as
+    "linux" (pre-3.13) or "android" (3.13+). Reported by user @LikiusInik
+    in May 2026 — only 3 built-in skills appeared on Termux because every
+    github/productivity/mlops skill is tagged platforms:[linux,macos,windows]
+    and sys.platform=="android" did not start with "linux".
+    """
+
+    def test_no_platforms_field_matches_everywhere(self):
+        # Backward-compat default — skills without a platforms tag load
+        # on any OS, Termux included.
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform({}) is True
+            assert skill_matches_platform({"name": "foo"}) is True
+
+    def test_linux_skill_loads_on_termux_android_platform(self):
+        # Python 3.13+ on Termux reports sys.platform == "android".
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_linux_macos_windows_skill_loads_on_termux(self):
+        # The common "[linux, macos, windows]" tag used by github-*,
+        # productivity, mlops, etc.
+        fm = {"platforms": ["linux", "macos", "windows"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_linux_skill_loads_on_termux_linux_platform(self):
+        # Pre-3.13 Termux reports sys.platform == "linux" already — this
+        # works without the Termux escape hatch but must still pass.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "linux"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_macos_only_skill_still_excluded_on_termux(self):
+        # macOS-only skills (apple-notes, imessage, ...) should NOT load
+        # on Termux. The Termux fallback only widens platforms:[linux,...].
+        fm = {"platforms": ["macos"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_windows_only_skill_still_excluded_on_termux(self):
+        fm = {"platforms": ["windows"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_explicit_termux_or_android_tag_matches(self):
+        # Skills can also opt in explicitly via platforms:[termux] or
+        # platforms:[android] — both should match a Termux session.
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform({"platforms": ["termux"]}) is True
+            assert skill_matches_platform({"platforms": ["android"]}) is True
+
+    def test_non_termux_android_does_not_widen(self):
+        # If we're somehow on a plain Android Python (not Termux), don't
+        # silently load Linux skills — Termux is the supported environment.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_linux_skill_on_real_linux_unaffected(self):
+        # The non-Termux Linux path must not change.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "linux"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_macos_skill_on_real_macos_unaffected(self):
+        fm = {"platforms": ["macos"]}
+        with patch("agent.skill_utils.sys.platform", "darwin"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is True
@@ -75,9 +75,197 @@ class TestCodeGeneration:
            code = store.generate_code("telegram", "user1", "Alice")
            pending = store.list_pending("telegram")
        assert len(pending) == 1
-        assert pending[0]["code"] == code
+        # list_pending no longer returns the original code — it returns a
+        # truncated hash prefix.  Verify the metadata is correct instead.
        assert pending[0]["user_id"] == "user1"
        assert pending[0]["user_name"] == "Alice"
+        # The code field is now a hash prefix, not the original plaintext code
+        assert pending[0]["code"] != code
+
+
+# ---------------------------------------------------------------------------
+# Hashed storage
+# ---------------------------------------------------------------------------
+
+
+class TestHashedStorage:
+    def test_pending_file_contains_hash_and_salt(self, tmp_path):
+        """Stored entries must have 'hash' and 'salt', never the plaintext code."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            code = store.generate_code("telegram", "user1", "Alice")
+            raw = json.loads(
+                (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
+            )
+
+        assert len(raw) == 1
+        entry = next(iter(raw.values()))
+        # Must have hash and salt fields
+        assert "hash" in entry
+        assert "salt" in entry
+        # Hash must be a valid hex SHA-256 digest (64 hex chars)
+        assert len(entry["hash"]) == 64
+        assert all(c in "0123456789abcdef" for c in entry["hash"])
+        # Salt must be a valid hex string (32 hex chars for 16 bytes)
+        assert len(entry["salt"]) == 32
+        assert all(c in "0123456789abcdef" for c in entry["salt"])
+        # The plaintext code must NOT appear as a key or value anywhere
+        assert code not in raw  # not a key
+        for key, val in raw.items():
+            assert code != key
+            for field_val in val.values():
+                if isinstance(field_val, str):
+                    assert field_val != code
+
+    def test_plaintext_code_not_stored(self, tmp_path):
+        """The raw JSON file must not contain the plaintext code anywhere."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            code = store.generate_code("telegram", "user1")
+            raw_text = (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
+        assert code not in raw_text
+
+    def test_valid_code_verifies_against_hash(self, tmp_path):
+        """approve_code with the correct code should succeed."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            code = store.generate_code("telegram", "user1", "Bob")
+            result = store.approve_code("telegram", code)
+        assert result is not None
+        assert result["user_id"] == "user1"
+        assert result["user_name"] == "Bob"
+
+    def test_invalid_code_rejected(self, tmp_path):
+        """approve_code with a wrong code should fail."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            store.generate_code("telegram", "user1")
+            result = store.approve_code("telegram", "ZZZZZZZZ")
+        assert result is None
+
+    def test_different_salts_per_entry(self, tmp_path):
+        """Each pending entry should have a unique salt."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            store = PairingStore()
+            store.generate_code("telegram", "user0")
+            store.generate_code("telegram", "user1")
+            store.generate_code("telegram", "user2")
+            raw = json.loads(
+                (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
+            )
+        salts = [entry["salt"] for entry in raw.values()]
+        assert len(set(salts)) == 3  # all unique
+
+    def test_hash_code_static_method(self, tmp_path):
+        """_hash_code should be deterministic for the same code+salt."""
+        salt = os.urandom(16)
+        h1 = PairingStore._hash_code("ABCD1234", salt)
+        h2 = PairingStore._hash_code("ABCD1234", salt)
+        assert h1 == h2
+        # Different salt should produce a different hash
+        salt2 = os.urandom(16)
+        h3 = PairingStore._hash_code("ABCD1234", salt2)
+        assert h3 != h1
+
+
+class TestLegacyPendingFileCompat:
+    """Defensive coverage for pre-hash pending.json on upgraded installs.
+
+    Existing user installs may have a pending.json written by the old
+    code (plaintext code as key, no hash/salt fields). The new
+    approve_code / list_pending / _cleanup_expired must not crash on
+    those entries — they should be ignored and aged out at TTL.
+    """
+
+    @staticmethod
+    def _write_legacy(tmp_path, code="ABCD1234", created_at=None):
+        """Write a pre-hash pending.json with plaintext code as the key."""
+        import time as _time
+        if created_at is None:
+            created_at = _time.time()
+        legacy = {
+            code: {
+                "user_id": "legacy-user",
+                "user_name": "Legacy",
+                "created_at": created_at,
+            }
+        }
+        (tmp_path / "telegram-pending.json").write_text(
+            json.dumps(legacy), encoding="utf-8"
+        )
+
+    def test_approve_code_ignores_legacy_entries(self, tmp_path):
+        """A valid old-format code must NOT silently approve under the new schema."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            self._write_legacy(tmp_path, code="LEGACY01")
+            store = PairingStore()
+            # The plaintext "code" used to be the key — under the new schema
+            # it's not even looked at, and there's no hash/salt to verify.
+            # Result: approve_code returns None, the legacy entry is left
+            # alone (gets pruned by _cleanup_expired at TTL).
+            result = store.approve_code("telegram", "LEGACY01")
+            assert result is None
+            # Approved list must be empty
+            assert store.is_approved("telegram", "legacy-user") is False
+
+    def test_list_pending_handles_legacy_entries(self, tmp_path):
+        """list_pending must not KeyError on a missing 'hash' field."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            self._write_legacy(tmp_path)
+            store = PairingStore()
+            pending = store.list_pending("telegram")
+        assert len(pending) == 1
+        assert pending[0]["user_id"] == "legacy-user"
+        assert pending[0]["code"] == "legacy"  # placeholder
+
+    def test_cleanup_expired_removes_legacy_at_ttl(self, tmp_path):
+        """Legacy entries past CODE_TTL must still get pruned."""
+        import time as _time
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            self._write_legacy(
+                tmp_path,
+                code="LEGACY99",
+                created_at=_time.time() - CODE_TTL_SECONDS - 1,
+            )
+            store = PairingStore()
+            store._cleanup_expired("telegram")
+            raw = json.loads(
+                (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
+            )
+        assert raw == {}
+
+    def test_cleanup_expired_handles_malformed_entries(self, tmp_path):
+        """Non-dict / missing-created_at entries get evicted, not crashed on."""
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            (tmp_path / "telegram-pending.json").write_text(
+                json.dumps({
+                    "broken1": "not a dict",
+                    "broken2": {"user_id": "x"},  # no created_at
+                    "broken3": {"created_at": "not a number"},
+                }),
+                encoding="utf-8",
+            )
+            store = PairingStore()
+            store._cleanup_expired("telegram")
+            raw = json.loads(
+                (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
+            )
+        assert raw == {}
+
+    def test_approve_code_skips_malformed_entries(self, tmp_path):
+        """Malformed entries must not crash approve_code's hash loop."""
+        import time as _time
+        with patch("gateway.pairing.PAIRING_DIR", tmp_path):
+            (tmp_path / "telegram-pending.json").write_text(
+                json.dumps({
+                    "broken": {"user_id": "x", "created_at": _time.time(),
+                               "salt": "not-hex", "hash": "doesntmatter"},
+                }),
+                encoding="utf-8",
+            )
+            store = PairingStore()
+            # Approving with any code must just return None, not crash.
+            assert store.approve_code("telegram", "ABCD1234") is None


 # ---------------------------------------------------------------------------
@@ -300,9 +488,10 @@ class TestCodeExpiry:
            store = PairingStore()
            code = store.generate_code("telegram", "user1")

-            # Manually expire the code
+            # Manually expire all pending entries
            pending = store._load_json(store._pending_path("telegram"))
-            pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
+            for entry_id in pending:
+                pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
            store._save_json(store._pending_path("telegram"), pending)

            # Cleanup happens on next operation
@@ -314,9 +503,10 @@ class TestCodeExpiry:
            store = PairingStore()
            code = store.generate_code("telegram", "user1")

-            # Expire it
+            # Expire all entries
            pending = store._load_json(store._pending_path("telegram"))
-            pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
+            for entry_id in pending:
+                pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
            store._save_json(store._pending_path("telegram"), pending)

            result = store.approve_code("telegram", code)
@@ -6,7 +6,11 @@ import pytest
 from pathlib import Path

 from gateway.config import PlatformConfig
-from gateway.platforms.webhook import WebhookAdapter, _DYNAMIC_ROUTES_FILENAME
+from gateway.platforms.webhook import (
+    WebhookAdapter,
+    _DYNAMIC_ROUTES_FILENAME,
+    _INSECURE_NO_AUTH,
+)


 def _make_adapter(routes=None, extra=None):
@@ -85,3 +89,78 @@ class TestDynamicRouteLoading:
        adapter._reload_dynamic_routes()
        assert "static" in adapter._routes
        assert len(adapter._dynamic_routes) == 0
+
+
+class TestDynamicRouteSecretValidation:
+    """Empty/missing secrets must be rejected during hot-reload.
+
+    Regression for HMAC bypass: prior to the fix, an agent-induced
+    dynamic route with `"secret": ""` would be merged into self._routes
+    by _reload_dynamic_routes(), then _handle_webhook's
+    `if secret and secret != _INSECURE_NO_AUTH` would skip signature
+    validation because empty string is falsy. Unauthenticated POSTs
+    would then execute the webhook prompt.
+    """
+
+    def test_empty_secret_rejected(self, tmp_path):
+        # Explicit empty-string secret must NOT fall back to the global
+        # secret, and the route must be skipped entirely.
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"evil": {"secret": "", "prompt": "rm -rf"}})
+        )
+        adapter = _make_adapter()  # has global secret
+        adapter._reload_dynamic_routes()
+        assert "evil" not in adapter._routes
+        assert "evil" not in adapter._dynamic_routes
+
+    def test_missing_secret_no_global_rejected(self, tmp_path):
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"orphan": {"prompt": "test"}})
+        )
+        # No global secret configured
+        adapter = _make_adapter(extra={"secret": ""})
+        adapter._reload_dynamic_routes()
+        assert "orphan" not in adapter._routes
+        assert "orphan" not in adapter._dynamic_routes
+
+    def test_missing_secret_inherits_global(self, tmp_path):
+        # No per-route secret but a global one is set → route is kept,
+        # the global secret protects it. Preserves existing fallback.
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"valid": {"prompt": "ok"}})
+        )
+        adapter = _make_adapter()  # global secret set
+        adapter._reload_dynamic_routes()
+        assert "valid" in adapter._routes
+
+    def test_insecure_no_auth_preserved(self, tmp_path):
+        # Explicit opt-in escape hatch for local testing — must still load.
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"test": {"secret": _INSECURE_NO_AUTH, "prompt": "p"}})
+        )
+        adapter = _make_adapter()
+        adapter._reload_dynamic_routes()
+        assert "test" in adapter._routes
+
+    def test_warning_logged_on_skip(self, tmp_path, caplog):
+        import logging
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({"silent": {"secret": "", "prompt": "x"}})
+        )
+        adapter = _make_adapter()
+        with caplog.at_level(logging.WARNING, logger="gateway.platforms.webhook"):
+            adapter._reload_dynamic_routes()
+        assert any("silent" in rec.message for rec in caplog.records)
+
+    def test_partial_skip(self, tmp_path):
+        # One route bad, one route good — only the bad one is dropped.
+        (tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
+            json.dumps({
+                "bad":  {"secret": "", "prompt": "x"},
+                "good": {"secret": "valid-secret", "prompt": "y"},
+            })
+        )
+        adapter = _make_adapter()
+        adapter._reload_dynamic_routes()
+        assert "good" in adapter._routes
+        assert "bad" not in adapter._routes
@@ -0,0 +1,131 @@
+"""Tests for curses color compatibility on low-color terminals (Docker).
+
+Regression test for #13688: ``hermes plugins`` crashes with
+``curses.error: init_pair() : color number is greater than COLORS-1``
+in Docker containers where curses.COLORS == 8 (only colors 0-7 exist).
+
+The bug was ``curses.init_pair(4, 8, -1)`` using raw color 8 ("bright
+black" / dim gray) which does not exist on 8-color terminals.  The fix
+clamps with ``min(8, curses.COLORS - 1)``.
+"""
+
+import curses
+import re
+from pathlib import Path
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+
+# Path to the source files under test
+_SRC_ROOT = Path(__file__).parent.parent.parent / "hermes_cli"
+
+
+class TestInitPairClampingBehavior:
+    """Simulate curses color initialization on low-color terminals.
+
+    Patches curses.COLORS to 8 (Docker default) and verifies that
+    init_pair is never called with a color >= COLORS.
+    """
+
+    def _collect_init_pair_calls(self, draw_fn, colors_value):
+        """Run a curses draw function with a mock stdscr and patched COLORS.
+
+        Returns list of (pair_number, fg, bg) tuples from init_pair calls.
+        """
+        calls = []
+        real_init_pair = curses.init_pair
+
+        def tracking_init_pair(pair, fg, bg):
+            calls.append((pair, fg, bg))
+
+        mock_stdscr = MagicMock()
+        mock_stdscr.getmaxyx.return_value = (24, 80)
+        mock_stdscr.getch.return_value = 27  # ESC to exit
+
+        with patch("curses.COLORS", colors_value, create=True), \
+             patch("curses.init_pair", side_effect=tracking_init_pair), \
+             patch("curses.has_colors", return_value=True), \
+             patch("curses.start_color"), \
+             patch("curses.use_default_colors"), \
+             patch("curses.curs_set"):
+            try:
+                draw_fn(mock_stdscr)
+            except (SystemExit, StopIteration, Exception):
+                pass  # draw functions loop until keypress
+
+        return calls
+
+    def test_8_color_terminal_no_color_exceeds_limit(self):
+        """On an 8-color terminal (Docker), no init_pair fg color >= 8."""
+        # Simulate the color init pattern from plugins_cmd.py
+        def _simulated_color_init(stdscr):
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                curses.init_pair(3, curses.COLOR_CYAN, -1)
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
+
+        calls = self._collect_init_pair_calls(_simulated_color_init, 8)
+        for pair, fg, bg in calls:
+            assert fg < 8, (
+                f"init_pair({pair}, {fg}, {bg}) uses color {fg} which "
+                f"does not exist on an 8-color terminal (valid: 0-7)"
+            )
+
+    def test_256_color_terminal_uses_color_8(self):
+        """On a 256-color terminal, color 8 (dim gray) should be used."""
+        def _simulated_color_init(stdscr):
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
+
+        calls = self._collect_init_pair_calls(_simulated_color_init, 256)
+        assert any(fg == 8 for _, fg, _ in calls), (
+            "On 256-color terminals, color 8 (dim gray) should be used"
+        )
+
+    def test_16_color_terminal_uses_color_8(self):
+        """On a 16-color terminal, color 8 should be available."""
+        def _simulated_color_init(stdscr):
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
+
+        calls = self._collect_init_pair_calls(_simulated_color_init, 16)
+        assert any(fg == 8 for _, fg, _ in calls)
+
+
+class TestSourceCodeGuardrails:
+    """Regression guardrails: raw color 8 must not reappear in source.
+
+    These complement the behavioral tests above — they catch regressions
+    introduced by copy-paste of the old pattern.
+    """
+
+    _RAW_COLOR_8_PATTERN = re.compile(r'init_pair\(\d+,\s*8\s*,')
+
+    def test_no_raw_color_8_in_plugins_cmd(self):
+        source = (_SRC_ROOT / "plugins_cmd.py").read_text()
+        matches = self._RAW_COLOR_8_PATTERN.findall(source)
+        assert not matches, (
+            f"plugins_cmd.py contains unclamped color 8: {matches}"
+        )
+
+    def test_no_raw_color_8_in_main(self):
+        source = (_SRC_ROOT / "main.py").read_text()
+        matches = self._RAW_COLOR_8_PATTERN.findall(source)
+        assert not matches, (
+            f"main.py contains unclamped color 8: {matches}"
+        )
+
+    def test_no_raw_color_8_in_curses_ui(self):
+        source = (_SRC_ROOT / "curses_ui.py").read_text()
+        matches = self._RAW_COLOR_8_PATTERN.findall(source)
+        assert not matches, (
+            f"curses_ui.py contains unclamped color 8: {matches}"
+        )
@@ -69,18 +69,19 @@ class TestPluginPickerInjection:
        assert "Myimg" in names
        assert "myimg" in plugin_names

-    def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
+    def test_fal_surfaced_alongside_other_plugins(self, monkeypatch):
        from hermes_cli import tools_config

-        # Simulate a FAL plugin being registered — the picker already has
-        # hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
-        # skipped to avoid showing FAL twice.
+        # After #26241, FAL is itself a plugin (`plugins/image_gen/fal/`)
+        # and the hardcoded `TOOL_CATEGORIES["image_gen"]` FAL row is
+        # gone. The plugin-row builder therefore surfaces it like any
+        # other backend — no deduplication step needed.
        image_gen_registry.register_provider(_FakeProvider("fal"))
        image_gen_registry.register_provider(_FakeProvider("openai"))

        rows = tools_config._plugin_image_gen_providers()
        names = [r.get("image_gen_plugin_name") for r in rows]
-        assert "fal" not in names
+        assert "fal" in names
        assert "openai" in names

    def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
@@ -1,4 +1,4 @@
-"""Tests for ``install_cua_driver`` upgrade semantics.
+"""Tests for ``install_cua_driver`` upgrade semantics and architecture pre-check.

 The cua-driver upstream installer always pulls the latest release tag, so
 re-running it is the canonical upgrade path. ``install_cua_driver(upgrade=True)``
@@ -10,18 +10,18 @@ must:
  fix for the "we only pulled cua-driver once on enable" complaint).
 * Preserve original ``upgrade=False`` behaviour for the toolset-enable flow:
  skip if installed, install otherwise, warn on non-macOS.
+* Pre-check architecture compatibility before downloading to avoid raw 404
+  errors on Intel macOS when the upstream release lacks x86_64 assets.
 """

 from __future__ import annotations

-from unittest.mock import patch
+import json
+from unittest.mock import MagicMock, patch


 class TestInstallCuaDriverUpgrade:
    def test_upgrade_on_non_macos_is_silent_noop(self):
-        """``hermes update`` calls install_cua_driver(upgrade=True) for every
-        user. On Linux/Windows it must return False without printing the
-        "macOS-only; skipping" warning that the toolset-enable path emits."""
        from hermes_cli import tools_config

        with patch.object(tools_config, "_print_warning") as warn, \
@@ -30,8 +30,6 @@ class TestInstallCuaDriverUpgrade:
            warn.assert_not_called()

    def test_non_upgrade_on_non_macos_warns(self):
-        """The toolset-enable path (upgrade=False) should still warn loudly
-        when the user tries to enable Computer Use on a non-macOS host."""
        from hermes_cli import tools_config

        with patch.object(tools_config, "_print_warning") as warn, \
@@ -40,43 +38,36 @@ class TestInstallCuaDriverUpgrade:
            warn.assert_called()

    def test_upgrade_on_macos_with_binary_runs_installer(self):
-        """When cua-driver is already on PATH and upgrade=True, we must
-        re-run the upstream installer (this is the fix for the bug report).
-        """
        from hermes_cli import tools_config

        with patch("platform.system", return_value="Darwin"), \
             patch.object(tools_config.shutil, "which",
                          side_effect=lambda n: "/usr/local/bin/" + n
                                                 if n in {"cua-driver", "curl"} else None), \
+             patch.object(tools_config, "_check_cua_driver_asset_for_arch",
+                          return_value=True), \
             patch.object(tools_config, "_run_cua_driver_installer",
                          return_value=True) as runner, \
             patch("subprocess.run"):
            assert tools_config.install_cua_driver(upgrade=True) is True
            runner.assert_called_once()
-            # Refresh path uses non-verbose mode so we don't re-print the
-            # "grant macOS permissions" block on every `hermes update`.
            kwargs = runner.call_args.kwargs
            assert kwargs.get("verbose") is False

    def test_upgrade_on_macos_without_binary_runs_installer(self):
-        """upgrade=True with cua-driver missing must still trigger an
-        install — equivalent to a fresh install. (Don't silently no-op.)"""
        from hermes_cli import tools_config

        with patch("platform.system", return_value="Darwin"), \
             patch.object(tools_config.shutil, "which",
                          side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
+             patch.object(tools_config, "_check_cua_driver_asset_for_arch",
+                          return_value=True), \
             patch.object(tools_config, "_run_cua_driver_installer",
                          return_value=True) as runner:
            assert tools_config.install_cua_driver(upgrade=True) is True
            runner.assert_called_once()

    def test_non_upgrade_on_macos_with_binary_skips_install(self):
-        """Original toolset-enable behaviour: cua-driver already installed
-        + upgrade=False → confirm and return without re-running installer.
-        This is the behaviour that ``hermes tools`` (re)enable depends on,
-        so the new helper must not regress it."""
        from hermes_cli import tools_config

        with patch("platform.system", return_value="Darwin"), \
@@ -89,27 +80,133 @@ class TestInstallCuaDriverUpgrade:
            runner.assert_not_called()

    def test_non_upgrade_on_macos_without_binary_runs_installer(self):
-        """Original fresh-install path must still work."""
        from hermes_cli import tools_config

        with patch("platform.system", return_value="Darwin"), \
             patch.object(tools_config.shutil, "which",
                          side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
+             patch.object(tools_config, "_check_cua_driver_asset_for_arch",
+                          return_value=True), \
             patch.object(tools_config, "_run_cua_driver_installer",
                          return_value=True) as runner:
            assert tools_config.install_cua_driver(upgrade=False) is True
-            runner.assert_called_once()

-    def test_upgrade_without_curl_does_not_crash(self):
-        """If curl isn't on PATH we can't refresh — must warn and return
-        the current install state, not raise."""
+
+class TestCheckCuaDriverAssetForArch:
+    def test_arm64_always_returns_true(self):
        from hermes_cli import tools_config

-        # cua-driver present, curl missing.
-        def _which(name):
-            return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
+        with patch("platform.machine", return_value="arm64"):
+            assert tools_config._check_cua_driver_asset_for_arch() is True
+
+    def test_x86_64_with_asset_returns_true(self):
+        from hermes_cli import tools_config
+
+        release = {
+            "tag_name": "cua-driver-v0.1.6",
+            "assets": [
+                {"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
+                {"name": "cua-driver-0.1.6-darwin-x86_64.tar.gz"},
+            ],
+        }
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = json.dumps(release).encode()
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", return_value=mock_resp):
+            assert tools_config._check_cua_driver_asset_for_arch() is True
+
+    def test_x86_64_without_asset_returns_false(self):
+        from hermes_cli import tools_config
+
+        release = {
+            "tag_name": "cua-driver-v0.1.6",
+            "assets": [
+                {"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
+                {"name": "cua-driver.tar.gz"},
+            ],
+        }
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = json.dumps(release).encode()
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", return_value=mock_resp), \
+             patch.object(tools_config, "_print_warning") as warn, \
+             patch.object(tools_config, "_print_info"):
+            assert tools_config._check_cua_driver_asset_for_arch() is False
+            warn.assert_called_once()
+            assert "no Intel" in warn.call_args[0][0].lower() or "x86_64" in warn.call_args[0][0]
+
+    def test_x86_64_api_failure_returns_true(self):
+        """Network failure should fail open — let the installer handle it."""
+        from hermes_cli import tools_config
+
+        with patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", side_effect=Exception("timeout")):
+            assert tools_config._check_cua_driver_asset_for_arch() is True
+
+    def test_fresh_install_x86_64_no_asset_skips_installer(self):
+        """When the latest release has no Intel asset, skip the installer."""
+        from hermes_cli import tools_config
+
+        release = {
+            "tag_name": "cua-driver-v0.1.6",
+            "assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
+        }
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = json.dumps(release).encode()
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)

        with patch("platform.system", return_value="Darwin"), \
-             patch.object(tools_config.shutil, "which", side_effect=_which), \
-             patch.object(tools_config, "_print_warning"):
+             patch.object(tools_config.shutil, "which",
+                          side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
+             patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", return_value=mock_resp), \
+             patch.object(tools_config, "_print_warning"), \
+             patch.object(tools_config, "_print_info"), \
+             patch.object(tools_config, "_run_cua_driver_installer") as runner:
+            assert tools_config.install_cua_driver(upgrade=False) is False
+            runner.assert_not_called()
+
+    def test_upgrade_x86_64_no_asset_returns_existing_status(self):
+        """On upgrade with no Intel asset, return whether binary existed."""
+        from hermes_cli import tools_config
+
+        release = {
+            "tag_name": "cua-driver-v0.1.6",
+            "assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
+        }
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = json.dumps(release).encode()
+        mock_resp.__enter__ = lambda s: s
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        # With binary installed — returns True (binary exists)
+        with patch("platform.system", return_value="Darwin"), \
+             patch.object(tools_config.shutil, "which",
+                          side_effect=lambda n: "/usr/local/bin/" + n
+                                                 if n in ("cua-driver", "curl") else None), \
+             patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", return_value=mock_resp), \
+             patch.object(tools_config, "_print_warning"), \
+             patch.object(tools_config, "_print_info"), \
+             patch.object(tools_config, "_run_cua_driver_installer") as runner:
            assert tools_config.install_cua_driver(upgrade=True) is True
+            runner.assert_not_called()
+
+        # Without binary — returns False
+        with patch("platform.system", return_value="Darwin"), \
+             patch.object(tools_config.shutil, "which",
+                          side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
+             patch("platform.machine", return_value="x86_64"), \
+             patch("urllib.request.urlopen", return_value=mock_resp), \
+             patch.object(tools_config, "_print_warning"), \
+             patch.object(tools_config, "_print_info"), \
+             patch.object(tools_config, "_run_cua_driver_installer") as runner:
+            assert tools_config.install_cua_driver(upgrade=True) is False
+            runner.assert_not_called()
@@ -90,6 +90,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
                "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
                "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
+                "app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
            },
        ),
        raising=False,
@@ -12,8 +12,10 @@ from hermes_cli.tools_config import (
    _get_platform_tools,
    _platform_toolset_summary,
    _reconfigure_tool,
+    _run_post_setup,
    _save_platform_tools,
    _toolset_has_keys,
+    _toolset_needs_configuration_prompt,
    CONFIGURABLE_TOOLSETS,
    TOOL_CATEGORIES,
    _visible_providers,
@@ -752,6 +754,91 @@ def test_numeric_mcp_server_name_does_not_crash_sorted():

 # ─── Imagegen Backend Picker Wiring ────────────────────────────────────────

+def test_toolset_has_keys_treats_no_key_providers_as_configured():
+    config = {}
+
+    assert _toolset_has_keys("computer_use", config) is True
+
+
+def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
+    """No-key providers can still need setup when their post_setup is unsatisfied.
+
+    Returning users enabling Computer Use through `hermes tools` must reach the
+    cua-driver post-setup installer even though the provider has no API keys.
+    """
+    with patch("shutil.which", return_value=None):
+        assert _toolset_needs_configuration_prompt("computer_use", {}) is True
+
+
+def test_computer_use_skips_configuration_when_cua_driver_already_installed():
+    """Installed post_setup dependencies should keep returning-user toggles no-op."""
+    def fake_which(name: str):
+        return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
+
+    with patch("shutil.which", side_effect=fake_which):
+        assert _toolset_needs_configuration_prompt("computer_use", {}) is False
+
+
+def test_computer_use_respects_custom_cua_driver_command():
+    """The setup gate should match runtime's HERMES_CUA_DRIVER_CMD override."""
+    def fake_which(name: str):
+        return "/opt/bin/custom-cua" if name == "custom-cua" else None
+
+    with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
+         patch("shutil.which", side_effect=fake_which):
+        assert _toolset_needs_configuration_prompt("computer_use", {}) is False
+
+
+def test_computer_use_blank_custom_driver_command_falls_back_to_default():
+    """Blank overrides should not make the setup gate look for an empty command."""
+    def fake_which(name: str):
+        return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
+
+    with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "   "}), \
+         patch("shutil.which", side_effect=fake_which):
+        assert _toolset_needs_configuration_prompt("computer_use", {}) is False
+
+
+def test_computer_use_post_setup_respects_custom_driver_command_when_installed():
+    """post_setup already-installed checks should version-probe the override."""
+    def fake_which(name: str):
+        return "/opt/bin/custom-cua" if name == "custom-cua" else None
+
+    with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
+         patch("platform.system", return_value="Darwin"), \
+         patch("shutil.which", side_effect=fake_which), \
+         patch("subprocess.run") as run:
+        run.return_value.stdout = "custom 1.2.3\n"
+
+        _run_post_setup("cua_driver")
+
+    run.assert_called_once()
+    assert run.call_args.args[0] == ["custom-cua", "--version"]
+
+
+def test_computer_use_post_setup_missing_override_does_not_accept_default_binary():
+    """A default cua-driver binary must not satisfy a missing runtime override."""
+    seen = []
+
+    def fake_which(name: str):
+        seen.append(name)
+        if name == "cua-driver":
+            return "/usr/local/bin/cua-driver"
+        if name == "curl":
+            return None
+        return None
+
+    with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
+         patch("platform.system", return_value="Darwin"), \
+         patch("shutil.which", side_effect=fake_which), \
+         patch("subprocess.run") as run:
+        _run_post_setup("cua_driver")
+
+    run.assert_not_called()
+    assert "custom-cua" in seen
+    assert "curl" in seen
+
+
 class TestImagegenBackendRegistry:
    """IMAGEGEN_BACKENDS tags drive the model picker flow in tools_config."""

@@ -168,7 +168,7 @@ def test_make_tui_argv_skips_build_only_on_termux_when_fresh(

    argv, cwd = main_mod._make_tui_argv(tmp_path, tui_dev=False)

-    assert argv == ["/bin/node", str(tmp_path / "dist" / "entry.js")]
+    assert argv == ["/bin/node", "--expose-gc", str(tmp_path / "dist" / "entry.js")]
    assert cwd == tmp_path


@@ -0,0 +1,300 @@
+"""Behavior-parity check for the image-gen FAL plugin migration (#26241).
+
+Spawns one subprocess per (version, scenario) cell — pinned to either
+``origin/main`` (legacy in-tree FAL fall-through + ``configured == "fal"``
+skip in ``_dispatch_to_plugin_provider``) or this PR's worktree (FAL is
+itself a plugin and the dispatcher routes every set provider through
+the registry). Each subprocess clears all FAL-related env vars + writes
+a ``config.yaml``, then asks the dispatcher how it would route an
+``image_generate`` call. The emitted shape tuple is
+``{dispatch_kind, provider_name, model}``:
+
+* ``dispatch_kind`` ∈ ``{"legacy_fal", "plugin", "error", None}`` —
+  whether the call would go straight to the in-tree pipeline,
+  through ``_dispatch_to_plugin_provider``, raise an explicit
+  provider-not-registered error, or fall through silently.
+* ``provider_name`` — when ``dispatch_kind == "plugin"``, the
+  resolved provider name. ``None`` otherwise.
+* ``model`` — the resolved FAL model id when applicable.
+
+The parent process diffs the shapes per scenario. A diff means the
+migration introduced an observable behaviour change vs origin/main —
+likely a real regression for users on the existing config keys.
+
+Run from the PR worktree:
+
+    python tests/plugins/image_gen/check_parity_vs_main.py
+"""
+from __future__ import annotations
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+
+
+# Pin one path to current main, one to the PR worktree.
+# ``REPO_ROOT`` is ``.../.worktrees/<name>``; the main checkout lives
+# two levels up. When running directly from a regular clone (no
+# worktree), ``MAIN_DIR`` falls back to a sibling ``hermes-agent-main``
+# checkout if one exists.
+def _resolve_main_dir() -> Path:
+    candidate = REPO_ROOT.parent.parent
+    if (candidate / "tools" / "image_generation_tool.py").exists() and candidate != REPO_ROOT:
+        return candidate
+    sibling = REPO_ROOT.parent / "hermes-agent-main"
+    if (sibling / "tools" / "image_generation_tool.py").exists():
+        return sibling
+    return REPO_ROOT
+
+
+MAIN_DIR = _resolve_main_dir()
+PR_DIR = REPO_ROOT
+assert (PR_DIR / "tools" / "image_generation_tool.py").exists(), (
+    f"PR_DIR={PR_DIR} doesn't look like a hermes-agent checkout"
+)
+
+
+SUBPROCESS_SCRIPT = r"""
+import json, os, sys, tempfile
+sys.path.insert(0, sys.argv[1])
+
+# Isolated HERMES_HOME so the config write is hermetic.
+home = tempfile.mkdtemp()
+os.environ["HERMES_HOME"] = home
+
+# Clear FAL-related env so dispatch decisions are config-driven.
+for k in (
+    "FAL_KEY", "FAL_QUEUE_GATEWAY_URL",
+    "TOOL_GATEWAY_DOMAIN", "TOOL_GATEWAY_USER_TOKEN",
+    "FAL_IMAGE_MODEL",
+):
+    os.environ.pop(k, None)
+
+scenario_env = json.loads(sys.argv[2])
+os.environ.update(scenario_env)
+
+config_yaml = sys.argv[3]
+config_path = os.path.join(home, "config.yaml")
+with open(config_path, "w") as f:
+    f.write(config_yaml)
+
+# Fresh import — must not have anything cached.
+for name in list(sys.modules):
+    if (name.startswith("tools.")
+            or name.startswith("agent.")
+            or name.startswith("plugins.")
+            or name.startswith("hermes_cli.")):
+        sys.modules.pop(name, None)
+
+import tools.image_generation_tool as image_tool
+
+dispatch_kind = None
+provider_name = None
+model = None
+error_text = None
+
+try:
+    raw = image_tool._dispatch_to_plugin_provider("ping", "landscape")
+    if raw is None:
+        dispatch_kind = "legacy_fal"
+    else:
+        parsed = json.loads(raw) if isinstance(raw, str) else raw
+        if isinstance(parsed, dict):
+            if parsed.get("error_type") == "provider_not_registered":
+                dispatch_kind = "error"
+                error_text = parsed.get("error")
+            else:
+                dispatch_kind = "plugin"
+                provider_name = parsed.get("provider")
+                model = parsed.get("model")
+        else:
+            dispatch_kind = "unknown_payload"
+
+    if model is None:
+        # _resolve_fal_model still returns the active FAL model id even
+        # when dispatch goes to a non-FAL plugin — used for the diff
+        # only when applicable.
+        try:
+            model_id, _meta = image_tool._resolve_fal_model()
+            if dispatch_kind == "legacy_fal":
+                model = model_id
+        except Exception:
+            pass
+except Exception as exc:
+    dispatch_kind = "exception"
+    error_text = repr(exc)
+
+shape = {
+    "dispatch_kind": dispatch_kind,
+    "provider_name": provider_name,
+    "model": model,
+    "error_present": error_text is not None,
+}
+print(json.dumps(shape))
+"""
+
+
+SCENARIOS: list[tuple[str, str, dict[str, str]]] = [
+    # (label, config.yaml body, extra env vars)
+    ("no-config-no-env", "", {}),
+    (
+        "explicit-fal-no-creds",
+        "image_gen:\n  provider: fal\n",
+        {},
+    ),
+    (
+        "explicit-fal-with-creds",
+        "image_gen:\n  provider: fal\n",
+        {"FAL_KEY": "test-key"},
+    ),
+    (
+        "explicit-fal-with-model",
+        "image_gen:\n  provider: fal\n  model: fal-ai/flux-2-pro\n",
+        {"FAL_KEY": "test-key"},
+    ),
+    (
+        "explicit-typo-provider",
+        "image_gen:\n  provider: not-a-real-backend\n",
+        {"FAL_KEY": "test-key"},
+    ),
+    (
+        "managed-gateway-only",
+        "",
+        {
+            "TOOL_GATEWAY_DOMAIN": "nousresearch.com",
+            "TOOL_GATEWAY_USER_TOKEN": "nous-token",
+        },
+    ),
+]
+
+
+def _run_scenario(repo_path: Path, label: str, config_yaml: str, env: dict) -> dict:
+    venv_python = repo_path / ".venv" / "bin" / "python"
+    if not venv_python.exists():
+        venv_python = MAIN_DIR / ".venv" / "bin" / "python"
+    if not venv_python.exists():
+        venv_python = Path("python3")
+
+    out = subprocess.run(
+        [
+            str(venv_python),
+            "-c",
+            SUBPROCESS_SCRIPT,
+            str(repo_path),
+            json.dumps(env),
+            config_yaml,
+        ],
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    if out.returncode != 0:
+        return {
+            "error": "subprocess failed",
+            "stdout": out.stdout[-500:],
+            "stderr": out.stderr[-500:],
+        }
+    try:
+        return json.loads(out.stdout.strip().splitlines()[-1])
+    except Exception as exc:
+        return {"error": f"could not parse output: {exc}", "stdout": out.stdout}
+
+
+def _reduce(shape: dict) -> dict:
+    """Reduce to the parts that matter for user-visible parity.
+
+    On origin/main, ``explicit-fal-*`` scenarios short-circuit to
+    ``legacy_fal`` because of the ``configured == "fal"`` skip. On the
+    PR, those same scenarios route through the plugin and emit
+    ``dispatch_kind == "plugin"`` with ``provider_name == "fal"``.
+
+    Both shapes are functionally equivalent — the plugin's ``generate()``
+    re-enters the same in-tree pipeline via ``_it`` indirection — but
+    we want the diff to be visible so reviewers can sign off on the
+    intentional behaviour delta.
+    """
+    return {
+        "dispatch_kind": shape.get("dispatch_kind"),
+        "provider_name": shape.get("provider_name"),
+        "model": shape.get("model"),
+        "error_present": shape.get("error_present"),
+    }
+
+
+def main() -> int:
+    print(f"main:    {MAIN_DIR}")
+    print(f"pr:      {PR_DIR}")
+    print()
+
+    if MAIN_DIR == PR_DIR:
+        print(
+            "WARN: MAIN_DIR == PR_DIR — diffs will be trivially identical.\n"
+            "      Set up a sibling 'hermes-agent-main' checkout pinned to "
+            "origin/main to get real parity coverage."
+        )
+        print()
+
+    failures: list[str] = []
+    errors: list[str] = []
+    intentional_diffs: list[tuple[str, dict, dict]] = []
+    for label, config_yaml, env in SCENARIOS:
+        main_shape = _run_scenario(MAIN_DIR, label, config_yaml, env)
+        pr_shape = _run_scenario(PR_DIR, label, config_yaml, env)
+
+        if "error" in main_shape or "error" in pr_shape:
+            print(f"  [ERR ] {label}: subprocess failed")
+            print(f"    main: {main_shape}")
+            print(f"    pr:   {pr_shape}")
+            errors.append(label)
+            continue
+
+        main_reduced = _reduce(main_shape)
+        pr_reduced = _reduce(pr_shape)
+
+        if main_reduced == pr_reduced:
+            print(f"  [OK]   {label}: {main_reduced}")
+            continue
+
+        # On main, "explicit-fal-*" returns legacy_fal; on PR, plugin
+        # dispatch. That's the only acceptable diff — flag everything
+        # else as a regression.
+        legacy_to_plugin_fal = (
+            main_reduced.get("dispatch_kind") == "legacy_fal"
+            and pr_reduced.get("dispatch_kind") == "plugin"
+            and pr_reduced.get("provider_name") == "fal"
+        )
+        if legacy_to_plugin_fal:
+            print(f"  [DIFF] {label}: legacy_fal → plugin (fal) — expected")
+            intentional_diffs.append((label, main_reduced, pr_reduced))
+        else:
+            print(f"  [FAIL] {label}")
+            print(f"    main: {main_reduced}")
+            print(f"    pr:   {pr_reduced}")
+            failures.append(label)
+
+    print()
+    if errors:
+        print(f"SUBPROCESS ERRORS in {len(errors)} scenario(s):")
+        for e in errors:
+            print(f"  - {e}")
+    if failures:
+        print(f"BEHAVIOUR REGRESSION in {len(failures)} scenario(s):")
+        for f in failures:
+            print(f"  - {f}")
+    if intentional_diffs:
+        print(
+            f"INTENTIONAL DIFFS ({len(intentional_diffs)}): "
+            f"legacy_fal → plugin dispatch for explicit FAL paths."
+        )
+    if failures or errors:
+        return 1
+    print(f"PARITY OK across {len(SCENARIOS)} scenarios.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+"""Tests for the FAL.ai image generation plugin.
+
+The plugin is a thin registration adapter — actual FAL pipeline logic
+lives in ``tools.image_generation_tool`` and is exercised by
+``tests/tools/test_image_generation.py``. These tests focus on:
+
+* the ``ImageGenProvider`` ABC surface (name, models, schema)
+* call-time indirection (``_it`` resolution at ``generate()`` time so
+  ``monkeypatch.setattr(image_tool, ...)`` keeps working)
+* response shape stamping (provider/prompt/aspect_ratio/model)
+"""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Provider surface
+# ---------------------------------------------------------------------------
+
+
+class TestFalImageGenProviderSurface:
+    def test_name(self):
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        assert FalImageGenProvider().name == "fal"
+
+    def test_display_name(self):
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        assert FalImageGenProvider().display_name == "FAL.ai"
+
+    def test_default_model_matches_legacy(self):
+        from plugins.image_gen.fal import FalImageGenProvider
+        from tools.image_generation_tool import DEFAULT_MODEL
+
+        assert FalImageGenProvider().default_model() == DEFAULT_MODEL
+
+    def test_list_models_uses_legacy_catalog(self):
+        from plugins.image_gen.fal import FalImageGenProvider
+        from tools.image_generation_tool import FAL_MODELS
+
+        provider = FalImageGenProvider()
+        models = provider.list_models()
+        ids = {m["id"] for m in models}
+        # Whatever FAL_MODELS ships, the provider mirrors verbatim.
+        assert ids == set(FAL_MODELS.keys())
+        # Spot-check the expected first-class fields are present.
+        for entry in models:
+            for field in ("id", "display", "speed", "strengths", "price"):
+                assert field in entry
+
+    def test_setup_schema_advertises_fal_key(self):
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        schema = FalImageGenProvider().get_setup_schema()
+        assert schema["name"] == "FAL.ai"
+        assert schema["badge"] == "paid"
+        env_keys = {entry["key"] for entry in schema.get("env_vars", [])}
+        assert "FAL_KEY" in env_keys
+
+
+class TestFalImageGenProviderAvailability:
+    def test_is_available_when_legacy_check_passes(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: True)
+        assert FalImageGenProvider().is_available() is True
+
+    def test_is_available_false_when_legacy_check_fails(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: False)
+        assert FalImageGenProvider().is_available() is False
+
+    def test_is_available_handles_legacy_exception(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        def _boom():
+            raise RuntimeError("config broke")
+
+        monkeypatch.setattr(image_tool, "check_fal_api_key", _boom)
+        # Picker must not propagate exceptions — show as "not available".
+        assert FalImageGenProvider().is_available() is False
+
+
+# ---------------------------------------------------------------------------
+# generate() — call-time indirection
+# ---------------------------------------------------------------------------
+
+
+class TestFalImageGenProviderGenerate:
+    def test_generate_delegates_to_legacy_image_generate_tool(self, monkeypatch):
+        """Plugin must look up ``image_generate_tool`` at call time so
+        ``monkeypatch.setattr(image_tool, "image_generate_tool", ...)``
+        takes effect."""
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        captured = {}
+
+        def fake_image_generate_tool(prompt, aspect_ratio, **kwargs):
+            captured["prompt"] = prompt
+            captured["aspect_ratio"] = aspect_ratio
+            captured["kwargs"] = kwargs
+            return json.dumps({"success": True, "image": "https://fake/image.png"})
+
+        monkeypatch.setattr(image_tool, "image_generate_tool", fake_image_generate_tool)
+        monkeypatch.setattr(image_tool, "_resolve_fal_model",
+                            lambda: ("fal-ai/flux-2/klein/9b", {}))
+
+        result = FalImageGenProvider().generate(
+            "a serene mountain landscape",
+            aspect_ratio="square",
+            seed=42,
+        )
+
+        assert captured["prompt"] == "a serene mountain landscape"
+        assert captured["aspect_ratio"] == "square"
+        assert captured["kwargs"] == {"seed": 42}
+        assert result["success"] is True
+        assert result["image"] == "https://fake/image.png"
+        # Stamped fields for the unified response shape
+        assert result["provider"] == "fal"
+        assert result["prompt"] == "a serene mountain landscape"
+        assert result["aspect_ratio"] == "square"
+        assert result["model"] == "fal-ai/flux-2/klein/9b"
+
+    def test_generate_invalid_aspect_ratio_is_coerced(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        seen_aspect = {}
+
+        def fake(prompt, aspect_ratio, **kwargs):
+            seen_aspect["v"] = aspect_ratio
+            return json.dumps({"success": True, "image": "x"})
+
+        monkeypatch.setattr(image_tool, "image_generate_tool", fake)
+        monkeypatch.setattr(image_tool, "_resolve_fal_model",
+                            lambda: ("fal-ai/flux-2/klein/9b", {}))
+
+        FalImageGenProvider().generate("p", aspect_ratio="not-a-real-ratio")
+        # ``resolve_aspect_ratio`` clamps to landscape.
+        assert seen_aspect["v"] == "landscape"
+
+    def test_generate_passthrough_drops_none_kwargs(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        seen = {}
+
+        def fake(prompt, aspect_ratio, **kwargs):
+            seen.update(kwargs)
+            return json.dumps({"success": True, "image": "x"})
+
+        monkeypatch.setattr(image_tool, "image_generate_tool", fake)
+        monkeypatch.setattr(image_tool, "_resolve_fal_model",
+                            lambda: ("fal-ai/flux-2/klein/9b", {}))
+
+        FalImageGenProvider().generate(
+            "p",
+            aspect_ratio="landscape",
+            seed=None,
+            num_images=2,
+            guidance_scale=None,
+        )
+
+        # ``None`` values must not be forwarded — they'd override the
+        # model's defaults inside the legacy payload builder.
+        assert "seed" not in seen
+        assert "guidance_scale" not in seen
+        assert seen.get("num_images") == 2
+
+    def test_generate_catches_exception_from_legacy(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        def boom(*args, **kwargs):
+            raise RuntimeError("FAL endpoint exploded")
+
+        monkeypatch.setattr(image_tool, "image_generate_tool", boom)
+
+        result = FalImageGenProvider().generate("p")
+        assert result["success"] is False
+        assert "FAL image generation failed" in result["error"]
+        assert result["error_type"] == "RuntimeError"
+        assert result["provider"] == "fal"
+
+    def test_generate_invalid_json_response(self, monkeypatch):
+        import tools.image_generation_tool as image_tool
+        from plugins.image_gen.fal import FalImageGenProvider
+
+        monkeypatch.setattr(image_tool, "image_generate_tool", lambda **kw: "not-json")
+        monkeypatch.setattr(image_tool, "_resolve_fal_model",
+                            lambda: ("fal-ai/flux-2/klein/9b", {}))
+
+        result = FalImageGenProvider().generate("p")
+        assert result["success"] is False
+        assert "Invalid JSON" in result["error"]
+        assert result["provider"] == "fal"
+
+
+# ---------------------------------------------------------------------------
+# Registry wiring
+# ---------------------------------------------------------------------------
+
+
+class TestFalImageGenPluginRegistration:
+    def test_register_wires_provider_into_registry(self):
+        from plugins.image_gen.fal import FalImageGenProvider, register
+
+        ctx = MagicMock()
+        register(ctx)
+
+        ctx.register_image_gen_provider.assert_called_once()
+        (registered,), _ = ctx.register_image_gen_provider.call_args
+        assert isinstance(registered, FalImageGenProvider)
@@ -0,0 +1,260 @@
+"""Tests for reactive multimodal-tool-content recovery.
+
+Covers the full chain for providers that reject list-type content in
+``role: "tool"`` messages (Xiaomi MiMo's 400 "text is not set", etc.):
+
+  1. agent/error_classifier.py: 400 with the right wording classifies as
+     ``FailoverReason.multimodal_tool_content_unsupported``.
+  2. run_agent._try_strip_image_parts_from_tool_messages downgrades tool
+     messages whose ``content`` is a list-with-image to a string text
+     summary, in-place, and records the active (provider, model) in
+     ``self._no_list_tool_content_models`` so future tool results in this
+     session preemptively downgrade.
+  3. run_agent._tool_result_content_for_active_model short-circuits to a
+     text summary when the (provider, model) is in the cache, even though
+     ``_model_supports_vision`` returns True — avoiding a wasted round
+     trip on every subsequent screenshot in the session.
+
+The end-to-end retry loop wiring (`conversation_loop.py`) is exercised by
+the classifier signal + helper-mutation tests; the integration only adds
+a trivial flag-and-continue around the existing pattern used for
+``image_too_large`` recovery.
+
+See: https://github.com/NousResearch/hermes-agent/issues/27344
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent.error_classifier import FailoverReason, classify_api_error
+
+
+class _FakeApiError(Exception):
+    """Stand-in for an openai.BadRequestError with status_code + body."""
+
+    def __init__(self, status_code: int, message: str, body: dict | None = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.body = body or {"error": {"message": message}}
+        self.response = None
+
+
+def _make_agent(provider: str = "xiaomi", model: str = "mimo-v2.5"):
+    """Build a bare AIAgent for method-level testing, no provider setup."""
+    from run_agent import AIAgent
+    agent = object.__new__(AIAgent)
+    agent.provider = provider
+    agent.model = model
+    return agent
+
+
+# ─── Strip helper ────────────────────────────────────────────────────────────
+
+
+class TestStripImagePartsHelper:
+    def test_no_messages_returns_false(self):
+        agent = _make_agent()
+        assert agent._try_strip_image_parts_from_tool_messages([]) is False
+        assert agent._try_strip_image_parts_from_tool_messages(None) is False
+
+    def test_no_tool_messages_returns_false(self):
+        agent = _make_agent()
+        msgs = [
+            {"role": "user", "content": "plain text"},
+            {"role": "assistant", "content": "ack"},
+        ]
+        assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
+
+    def test_tool_message_with_string_content_unchanged(self):
+        agent = _make_agent()
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": "plain string result"},
+        ]
+        assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
+        assert msgs[0]["content"] == "plain string result"
+
+    def test_tool_message_list_without_image_unchanged(self):
+        """List content with only text parts is left alone — caller surfaces
+        the original error if this turns out to also be rejected."""
+        agent = _make_agent()
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "text", "text": "hello"},
+            ]},
+        ]
+        assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
+
+    def test_tool_message_list_with_image_downgrades(self):
+        agent = _make_agent()
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "text", "text": "AX summary: 5 buttons visible"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
+            ]},
+        ]
+        assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
+        # Image stripped; text preserved as a string.
+        assert isinstance(msgs[0]["content"], str)
+        assert "AX summary" in msgs[0]["content"]
+        assert "image_url" not in msgs[0]["content"]
+        assert "iVBOR" not in msgs[0]["content"]
+
+    def test_tool_message_image_only_gets_placeholder(self):
+        """If the list had nothing but image parts, leave a placeholder so
+        the assistant message has something to reference."""
+        agent = _make_agent()
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
+            ]},
+        ]
+        assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
+        assert isinstance(msgs[0]["content"], str)
+        assert "image content removed" in msgs[0]["content"]
+
+    def test_records_provider_model_in_session_cache(self):
+        agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "text", "text": "summary"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
+            ]},
+        ]
+        agent._try_strip_image_parts_from_tool_messages(msgs)
+        assert ("xiaomi", "mimo-v2.5") in agent._no_list_tool_content_models
+
+    def test_only_tool_messages_get_downgraded(self):
+        """User / assistant messages with list-type content are out of
+        scope — they're handled by the existing image-routing path."""
+        agent = _make_agent()
+        msgs = [
+            {"role": "user", "content": [
+                {"type": "text", "text": "describe"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
+            ]},
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "text", "text": "summary"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,Y"}},
+            ]},
+        ]
+        agent._try_strip_image_parts_from_tool_messages(msgs)
+        # User message untouched.
+        assert isinstance(msgs[0]["content"], list)
+        assert any(p.get("type") == "image_url" for p in msgs[0]["content"])
+        # Tool message downgraded.
+        assert isinstance(msgs[1]["content"], str)
+        assert "summary" in msgs[1]["content"]
+
+    def test_skips_recording_when_no_model_id(self):
+        """Don't poison the cache with empty keys when provider/model is
+        unset (e.g. lazy-initialised mid-handshake)."""
+        agent = _make_agent(provider="", model="")
+        msgs = [
+            {"role": "tool", "tool_call_id": "x", "content": [
+                {"type": "text", "text": "summary"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
+            ]},
+        ]
+        agent._try_strip_image_parts_from_tool_messages(msgs)
+        assert agent._no_list_tool_content_models == set()
+
+
+# ─── Short-circuit on cached models ──────────────────────────────────────────
+
+
+class TestToolResultContentShortCircuit:
+    """Once the session has learned that (provider, model) rejects list
+    content, ``_tool_result_content_for_active_model`` returns a text
+    summary even though ``_model_supports_vision`` reports True.
+    """
+
+    def _multimodal_result(self, png_b64: str = "iVBORw0KGgoAAAA"):
+        return {
+            "_multimodal": True,
+            "content": [
+                {"type": "text", "text": "capture mode=som 800x600 app=Safari"},
+                {"type": "image_url",
+                 "image_url": {"url": f"data:image/png;base64,{png_b64}"}},
+            ],
+            "text_summary": "capture mode=som 800x600 app=Safari",
+            "meta": {"mode": "som", "width": 800, "height": 600, "elements": 5,
+                     "png_bytes": 1024},
+        }
+
+    def test_returns_list_when_cache_empty_and_vision_supported(self, monkeypatch):
+        agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
+        agent._no_list_tool_content_models = set()  # explicit empty
+        monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
+        out = agent._tool_result_content_for_active_model(
+            "computer_use", self._multimodal_result()
+        )
+        # Native multimodal path: returns the content parts list.
+        assert isinstance(out, list)
+        assert any(p.get("type") == "image_url" for p in out)
+
+    def test_returns_text_summary_when_model_in_cache(self, monkeypatch):
+        agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
+        agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
+        monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
+        out = agent._tool_result_content_for_active_model(
+            "computer_use", self._multimodal_result()
+        )
+        # Short-circuit: a plain string summary, no image_url present.
+        assert isinstance(out, str)
+        assert "data:image" not in out
+        assert "image_url" not in out
+
+    def test_cache_miss_on_different_model(self, monkeypatch):
+        """Cache is per (provider, model). A cached entry for mimo-v2.5
+        must NOT affect a session running on a different model.
+        """
+        agent = _make_agent(provider="xiaomi", model="mimo-v2.5-pro")
+        agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
+        monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
+        out = agent._tool_result_content_for_active_model(
+            "computer_use", self._multimodal_result()
+        )
+        assert isinstance(out, list)
+
+    def test_missing_cache_attribute_falls_through(self, monkeypatch):
+        """Tests that build agents via ``object.__new__`` without calling
+        ``__init__`` must not crash — the cache attribute may be absent.
+        """
+        agent = _make_agent()
+        # Deliberately do not assign _no_list_tool_content_models.
+        monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
+        out = agent._tool_result_content_for_active_model(
+            "computer_use", self._multimodal_result()
+        )
+        assert isinstance(out, list)
+
+
+# ─── Classifier ──────────────────────────────────────────────────────────────
+
+
+class TestRecoveryEndToEndClassification:
+    """Lock in that the patterns used by the recovery path classify to
+    the right ``FailoverReason``. (The recovery hook in
+    ``agent.conversation_loop`` consumes this reason directly.)
+    """
+
+    def test_xiaomi_mimo_classifies(self):
+        err = _FakeApiError(
+            status_code=400,
+            message=(
+                "Error code: 400 - {'error': {'code': '400', 'message': "
+                "'Param Incorrect', 'param': 'text is not set', 'type': ''}}"
+            ),
+        )
+        result = classify_api_error(err, provider="xiaomi", model="mimo-v2.5")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+        assert result.retryable is True
+
+    def test_alibaba_variant_classifies(self):
+        err = _FakeApiError(
+            status_code=400,
+            message="tool_call.content must be string",
+        )
+        result = classify_api_error(err, provider="alibaba", model="qwen3.5-plus")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
@@ -2636,6 +2636,31 @@ class TestRunConversation:
        assert result["final_response"] == "Final answer"
        assert result["completed"] is True

+    def test_ollama_small_runtime_context_fails_before_api_call(self, agent, caplog):
+        self._setup_agent(agent)
+        agent.model = "qwen3.5:9b"
+        agent.provider = "custom"
+        agent.base_url = "http://host.docker.internal:11434/v1"
+        agent._ollama_num_ctx = 4096
+
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+            caplog.at_level(logging.WARNING, logger="agent.conversation_loop"),
+        ):
+            result = agent.run_conversation("Call ps -aux")
+
+        assert result["failed"] is True
+        assert result["completed"] is False
+        assert result["api_calls"] == 0
+        assert result["turn_exit_reason"] == "ollama_runtime_context_too_small"
+        assert "Ollama loaded `qwen3.5:9b` with only 4,096 tokens" in result["final_response"]
+        assert "model.ollama_num_ctx: 65536" in result["final_response"]
+        assert not agent.client.chat.completions.create.called
+        assert "Ollama runtime context too small for Hermes tool use" in caplog.text
+        assert "runtime_context=4096" in caplog.text
+
    def test_tool_calls_then_stop(self, agent):
        self._setup_agent(agent)
        tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
@@ -0,0 +1,119 @@
+"""Tests for the secret-source tracking in ``hermes_cli.env_loader``.
+
+These cover the small public surface that lets `hermes model` / `hermes setup`
+label detected credentials with their origin ("from Bitwarden") so users
+don't see an unexplained "credentials ✓" line when their .env is empty.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from hermes_cli import env_loader  # noqa: E402
+
+
+@pytest.fixture(autouse=True)
+def _reset_sources():
+    """Each test starts with a clean source map."""
+    env_loader._SECRET_SOURCES.clear()
+    yield
+    env_loader._SECRET_SOURCES.clear()
+
+
+def test_get_secret_source_returns_none_for_untracked_var():
+    assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
+
+
+def test_get_secret_source_returns_label_for_tracked_var():
+    env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
+    assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
+
+
+def test_format_secret_source_suffix_empty_for_untracked():
+    # Credentials from .env or the shell shouldn't add noise — the
+    # implicit case stays unlabeled.
+    assert env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY") == ""
+
+
+def test_format_secret_source_suffix_bitwarden_uses_proper_name():
+    env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
+    assert (
+        env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
+        == " (from Bitwarden)"
+    )
+
+
+def test_format_secret_source_suffix_generic_label_for_future_sources():
+    # Future-proofing: a new secret source (e.g. "vault") should still
+    # produce a sensible label without needing to edit every call site.
+    env_loader._SECRET_SOURCES["OPENAI_API_KEY"] = "vault"
+    assert (
+        env_loader.format_secret_source_suffix("OPENAI_API_KEY")
+        == " (from vault)"
+    )
+
+
+def test_apply_external_secret_sources_records_bitwarden_origin(tmp_path, monkeypatch):
+    """End-to-end: when ``apply_bitwarden_secrets`` returns applied keys,
+    they end up in ``_SECRET_SOURCES`` so the UI can label them."""
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config_path = tmp_path / "config.yaml"
+    config_path.write_text(
+        "secrets:\n"
+        "  bitwarden:\n"
+        "    enabled: true\n"
+        "    project_id: test-project\n"
+        "    access_token_env: BWS_ACCESS_TOKEN\n",
+        encoding="utf-8",
+    )
+
+    # Stub apply_bitwarden_secrets to return a synthetic FetchResult.
+    from agent.secret_sources.bitwarden import FetchResult
+
+    fake_result = FetchResult(
+        secrets={"ANTHROPIC_API_KEY": "sk-ant-test"},
+        applied=["ANTHROPIC_API_KEY"],
+    )
+
+    def _fake_apply(**_kwargs):
+        return fake_result
+
+    # The import inside _apply_external_secret_sources is lazy, so we
+    # patch the *module attribute* it will pull in.
+    import agent.secret_sources.bitwarden as bw_module
+
+    monkeypatch.setattr(bw_module, "apply_bitwarden_secrets", _fake_apply)
+
+    env_loader._apply_external_secret_sources(tmp_path)
+
+    assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
+    assert (
+        env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
+        == " (from Bitwarden)"
+    )
+
+
+def test_apply_external_secret_sources_noop_when_disabled(tmp_path, monkeypatch):
+    """Disabled Bitwarden config must not touch the source map."""
+
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config_path = tmp_path / "config.yaml"
+    config_path.write_text(
+        "secrets:\n"
+        "  bitwarden:\n"
+        "    enabled: false\n",
+        encoding="utf-8",
+    )
+
+    env_loader._apply_external_secret_sources(tmp_path)
+
+    assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
@@ -59,6 +59,59 @@ def test_write_json_returns_false_on_broken_pipe(monkeypatch):
    assert server.write_json({"ok": True}) is False


+def test_tui_verbose_tool_details_fail_closed_when_redaction_fails(monkeypatch):
+    redact_module = types.ModuleType("agent.redact")
+
+    def fail_redaction(*_args, **_kwargs):
+        raise RuntimeError("redaction unavailable")
+
+    setattr(redact_module, "redact_sensitive_text", fail_redaction)
+    monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
+
+    assert server._redact_tui_verbose_text("api_key=secret") == ""
+    assert server._tool_args_text({"api_key": "secret"}) == ""
+    assert server._tool_result_text("token=secret") == ""
+
+
+def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch):
+    monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_CHARS", 12)
+    monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_LINES", 2)
+
+    capped = server._cap_tui_verbose_text("one\ntwo\nthree\nfour")
+
+    assert capped.startswith("[showing verbose tail; omitted ")
+    assert capped.endswith("three\nfour")
+    assert "one" not in capped
+
+
+def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch):
+    redact_module = types.ModuleType("agent.redact")
+
+    def fail_redaction(*_args, **_kwargs):
+        raise RuntimeError("redaction unavailable")
+
+    setattr(redact_module, "redact_sensitive_text", fail_redaction)
+    monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
+
+    events: list[tuple[str, str, dict]] = []
+    monkeypatch.setattr(
+        server, "_emit", lambda event_type, sid, payload: events.append((event_type, sid, payload))
+    )
+    monkeypatch.setitem(
+        server._sessions,
+        "redaction-test",
+        {"tool_progress_mode": "verbose", "tool_started_at": {}},
+    )
+
+    server._on_tool_start("redaction-test", "tool-1", "terminal", {"command": "pwd"})
+    server._on_tool_complete("redaction-test", "tool-1", "terminal", {"command": "pwd"}, "done")
+
+    assert events[0][0] == "tool.start"
+    assert events[1][0] == "tool.complete"
+    assert "args_text" not in events[0][2]
+    assert "result_text" not in events[1][2]
+
+
 def test_dispatch_rejects_non_object_request():
    resp = server.dispatch([])

@@ -8,6 +8,8 @@ depend on the registry being populated should use it explicitly or via
 ``@pytest.mark.usefixtures("web_registry_populated")``.
 """

+from unittest.mock import patch
+
 import pytest


@@ -48,3 +50,20 @@ def web_registry_populated():
    yield
    from agent.web_search_registry import _reset_for_tests
    _reset_for_tests()
+
+
+@pytest.fixture
+def disable_lazy_stt_install():
+    """Disarm the runtime lazy-install probe so static ``_HAS_FASTER_WHISPER``
+    patches accurately simulate 'faster-whisper not installed'.
+
+    Without this, ``_try_lazy_install_stt()`` calls
+    ``importlib.util.find_spec("faster_whisper")``, which returns truthy
+    whenever the package is installed in the dev / CI environment —
+    defeating the test's ``_HAS_FASTER_WHISPER=False`` patch.
+
+    Opt in at module scope with
+    ``pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")``.
+    """
+    with patch("tools.transcription_tools._try_lazy_install_stt", return_value=False):
+        yield
@@ -0,0 +1,246 @@
+"""Unit tests for tools/app_tools.py — the Nous tool gateway integration."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+import httpx
+import pytest
+
+from tools.managed_tool_gateway import ManagedToolGatewayConfig
+
+
+_FAKE_GATEWAY = ManagedToolGatewayConfig(
+    vendor="tools",
+    gateway_origin="https://tools-gateway.example.com",
+    nous_user_token="test-token-abc123",
+    managed_mode=True,
+)
+
+
+@pytest.fixture(autouse=True)
+def _reset_http_client_cache():
+    """Clear the module-level cached httpx client between tests."""
+    import tools.app_tools as mod
+    mod._http_client = None
+    mod._http_client_origin = None
+    yield
+    mod._http_client = None
+    mod._http_client_origin = None
+
+
+@pytest.fixture()
+def gateway_post(monkeypatch):
+    """Patch the gateway and httpx.Client.post; return a dict capturing the request."""
+    monkeypatch.setattr(
+        "tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY
+    )
+    monkeypatch.setattr(
+        "tools.app_tools._get_current_model_name", lambda: None
+    )
+    captured = {}
+    resp = MagicMock(spec=httpx.Response)
+    resp.status_code = 200
+    resp.json.return_value = {"data": {}, "error": None}
+    resp.text = json.dumps({"data": {}, "error": None})
+
+    def fake_post(self, url, *, json=None, headers=None, **kw):
+        captured["url"] = url
+        captured["headers"] = headers
+        captured["json"] = json
+        return resp
+
+    monkeypatch.setattr(httpx.Client, "post", fake_post)
+    return captured
+
+
+# ---------------------------------------------------------------------------
+# check_fn gating
+# ---------------------------------------------------------------------------
+
+class TestAppToolsAvailability:
+    def test_returns_false_when_gateway_not_ready(self, monkeypatch):
+        monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: False)
+        monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
+        from tools.app_tools import _app_tools_available
+        assert _app_tools_available() is False
+
+    def test_returns_true_when_gateway_ready_and_config_on(self, monkeypatch):
+        monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
+        monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
+        from tools.app_tools import _app_tools_available
+        assert _app_tools_available() is True
+
+    def test_returns_false_when_config_off(self, monkeypatch):
+        monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
+        monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: False)
+        from tools.app_tools import _app_tools_available
+        assert _app_tools_available() is False
+
+
+# ---------------------------------------------------------------------------
+# URL + auth header
+# ---------------------------------------------------------------------------
+
+class TestSearchPostsCorrectUrlAndAuth:
+    def test_posts_to_v1_search_with_bearer_token(self, monkeypatch, gateway_post):
+        monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "test-model")
+        from tools.app_tools import handle_app_search_tools
+        handle_app_search_tools({"queries": [{"use_case": "send email"}]})
+
+        assert gateway_post["url"] == "https://tools-gateway.example.com/v1/search"
+        assert gateway_post["headers"]["Authorization"] == "Bearer test-token-abc123"
+        assert gateway_post["headers"]["Content-Type"] == "application/json"
+        assert gateway_post["json"]["queries"] == [{"use_case": "send email"}]
+        assert gateway_post["json"]["model"] == "test-model"
+
+
+# ---------------------------------------------------------------------------
+# Model auto-injection
+# ---------------------------------------------------------------------------
+
+class TestModelAutoInjection:
+    def test_injects_model_from_config(self, monkeypatch, gateway_post):
+        monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "claude-sonnet-4")
+        from tools.app_tools import handle_app_search_tools
+        handle_app_search_tools({"queries": [{"use_case": "test"}]})
+        assert gateway_post["json"]["model"] == "claude-sonnet-4"
+
+    def test_omits_model_when_unresolvable(self, gateway_post):
+        from tools.app_tools import handle_app_search_tools
+        handle_app_search_tools({"queries": [{"use_case": "test"}]})
+        assert "model" not in gateway_post["json"]
+
+
+# ---------------------------------------------------------------------------
+# Gateway-internal param stripping (allowlist approach)
+# ---------------------------------------------------------------------------
+
+class TestExecuteStripsInternalParams:
+    def test_strips_sync_response_thought_step_metric(self, gateway_post):
+        from tools.app_tools import handle_app_execute_tools
+        handle_app_execute_tools({
+            "tools": [{"tool_slug": "TEST", "arguments": {}}],
+            "sync_response_to_workbench": True,
+            "thought": "testing",
+            "current_step": "TESTING",
+            "current_step_metric": "1/1 tests",
+        })
+        body = gateway_post["json"]
+        for key in ("sync_response_to_workbench", "thought", "current_step", "current_step_metric"):
+            assert key not in body
+        assert body["tools"] == [{"tool_slug": "TEST", "arguments": {}}]
+
+
+# ---------------------------------------------------------------------------
+# HTTP error → tool result (not exception)
+# ---------------------------------------------------------------------------
+
+class TestHttpErrorReturnedAsToolResult:
+    @pytest.mark.parametrize("status_code", [402, 403, 422, 500])
+    def test_returns_error_json_not_exception(self, monkeypatch, status_code):
+        monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
+        error_body = {"error": {"code": "TEST_ERROR", "message": "fail"}}
+        resp = MagicMock(spec=httpx.Response)
+        resp.status_code = status_code
+        resp.json.return_value = error_body
+        resp.text = json.dumps(error_body)
+        monkeypatch.setattr(httpx.Client, "post", lambda self, url, **kw: resp)
+
+        from tools.app_tools import handle_app_search_tools
+        result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
+        assert result["error"]["code"] == "TEST_ERROR"
+
+
+# ---------------------------------------------------------------------------
+# Network failure → tool result
+# ---------------------------------------------------------------------------
+
+class TestNetworkFailureReturnedAsToolResult:
+    def test_connect_error_returns_gateway_unreachable(self, monkeypatch):
+        monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
+
+        def raise_connect(self, url, **kw):
+            raise httpx.ConnectError("Connection refused")
+        monkeypatch.setattr(httpx.Client, "post", raise_connect)
+
+        from tools.app_tools import handle_app_search_tools
+        result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
+        assert result["error"]["code"] == "GATEWAY_UNREACHABLE"
+
+    def test_timeout_returns_gateway_timeout(self, monkeypatch):
+        monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
+
+        def raise_timeout(self, url, **kw):
+            raise httpx.ReadTimeout("timed out")
+        monkeypatch.setattr(httpx.Client, "post", raise_timeout)
+
+        from tools.app_tools import handle_app_search_tools
+        result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
+        assert result["error"]["code"] == "GATEWAY_TIMEOUT"
+
+
+# ---------------------------------------------------------------------------
+# Endpoint routing + payload forwarding
+# ---------------------------------------------------------------------------
+
+class TestEndpointRouting:
+    def test_manage_connections_forwards_toolkits(self, gateway_post):
+        from tools.app_tools import handle_app_manage_connections
+        handle_app_manage_connections({"toolkits": ["gmail", "slack"], "reinitiate_all": True})
+        assert gateway_post["url"].endswith("/v1/connections")
+        assert gateway_post["json"]["toolkits"] == ["gmail", "slack"]
+        assert gateway_post["json"]["reinitiate_all"] is True
+
+    def test_tool_schemas_forwards_slugs(self, gateway_post):
+        from tools.app_tools import handle_app_tool_schemas
+        handle_app_tool_schemas({"tool_slugs": ["GMAIL_SEND_EMAIL"], "include": ["input_schema", "output_schema"]})
+        assert gateway_post["url"].endswith("/v1/schemas")
+        assert gateway_post["json"]["tool_slugs"] == ["GMAIL_SEND_EMAIL"]
+        assert gateway_post["json"]["include"] == ["input_schema", "output_schema"]
+
+
+# ---------------------------------------------------------------------------
+# Registry entries
+# ---------------------------------------------------------------------------
+
+class TestRegistryEntries:
+    def test_all_four_tools_registered_under_app_tools(self):
+        from tools.registry import registry
+        import tools.app_tools  # noqa: F401
+        expected = {"app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"}
+        for name in expected:
+            entry = registry._tools.get(name)
+            assert entry is not None, f"{name} not registered"
+            assert entry.toolset == "app_tools"
+
+
+# ---------------------------------------------------------------------------
+# session (object) vs session_id (string) asymmetry
+# ---------------------------------------------------------------------------
+
+class TestSessionHandling:
+    def test_search_uses_session_object(self, gateway_post):
+        from tools.app_tools import handle_app_search_tools
+        handle_app_search_tools({"queries": [{"use_case": "test"}], "session": {"generate_id": True}})
+        assert isinstance(gateway_post["json"]["session"], dict)
+        assert "session_id" not in gateway_post["json"]
+
+    def test_schemas_uses_session_id_string(self, gateway_post):
+        from tools.app_tools import handle_app_tool_schemas
+        handle_app_tool_schemas({"tool_slugs": ["TEST"], "session_id": "sess-123"})
+        assert gateway_post["json"]["session_id"] == "sess-123"
+        assert "session" not in gateway_post["json"]
+
+    def test_execute_uses_session_id_string(self, gateway_post):
+        from tools.app_tools import handle_app_execute_tools
+        handle_app_execute_tools({"tools": [{"tool_slug": "TEST", "arguments": {}}], "session_id": "sess-456"})
+        assert gateway_post["json"]["session_id"] == "sess-456"
+        assert "session" not in gateway_post["json"]
+
+    def test_connections_uses_session_id_string(self, gateway_post):
+        from tools.app_tools import handle_app_manage_connections
+        handle_app_manage_connections({"toolkits": ["gmail"], "session_id": "sess-789"})
+        assert gateway_post["json"]["session_id"] == "sess-789"
+        assert "session" not in gateway_post["json"]
@@ -76,6 +76,27 @@ class TestSchema:
        modes = set(COMPUTER_USE_SCHEMA["parameters"]["properties"]["mode"]["enum"])
        assert modes == {"som", "vision", "ax"}

+    def test_schema_exposes_max_elements_cap_for_capture(self):
+        from tools.computer_use.schema import COMPUTER_USE_SCHEMA
+        props = COMPUTER_USE_SCHEMA["parameters"]["properties"]
+        assert "max_elements" in props
+        assert props["max_elements"]["type"] == "integer"
+        assert props["max_elements"].get("minimum", 1) >= 1
+
+    def test_schema_max_elements_documents_default_and_upper_bound(self):
+        """Schema description must agree with the runtime. The original PR
+        text said "Default 100" without a corresponding `default` field, and
+        had no upper bound — both Copilot findings.
+        """
+        from tools.computer_use.schema import COMPUTER_USE_SCHEMA
+        from tools.computer_use.tool import (
+            _DEFAULT_MAX_ELEMENTS,
+            _MAX_ALLOWED_MAX_ELEMENTS,
+        )
+        prop = COMPUTER_USE_SCHEMA["parameters"]["properties"]["max_elements"]
+        assert prop.get("default") == _DEFAULT_MAX_ELEMENTS
+        assert prop.get("maximum") == _MAX_ALLOWED_MAX_ELEMENTS
+

 class TestRegistration:
    def test_tool_registers_with_registry(self):
@@ -205,6 +226,54 @@ class TestDispatch:
        parsed = json.loads(out)
        assert "error" in parsed

+    def test_set_value_routes_to_backend(self, noop_backend):
+        """set_value must reach the backend — regression for missing _NoopBackend stub."""
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "set_value", "value": "Option A", "element": 5})
+        parsed = json.loads(out)
+        assert parsed.get("ok") is True
+        assert parsed.get("action") == "set_value"
+        assert any(c[0] == "set_value" for c in noop_backend.calls)
+
+    def test_set_value_missing_value_returns_error(self, noop_backend):
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "set_value"})
+        parsed = json.loads(out)
+        assert "error" in parsed
+    def test_capture_after_skipped_when_action_failed(self, noop_backend):
+        """capture_after must not fire when res.ok=False (regression guard).
+
+        A follow-up screenshot after a failed action shows the screen in a
+        normal state, misleading the model into thinking the action succeeded.
+        """
+        from unittest.mock import patch
+        from tools.computer_use.backend import ActionResult
+        from tools.computer_use.tool import handle_computer_use
+
+        # Make click() return a failure.
+        with patch.object(noop_backend, "click",
+                          return_value=ActionResult(ok=False, action="click",
+                                                    message="element not found")):
+            out = handle_computer_use({"action": "click", "element": 99,
+                                       "capture_after": True})
+
+        parsed = json.loads(out)
+        # Should return the error, not a multimodal capture.
+        assert parsed.get("ok") is False
+        assert parsed.get("action") == "click"
+        # No follow-up capture should have been issued.
+        capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
+        assert len(capture_calls) == 0, "capture must not be called after a failed action"
+
+    def test_capture_after_fires_when_action_succeeds(self, noop_backend):
+        """capture_after must trigger for successful actions."""
+        from tools.computer_use.tool import handle_computer_use
+        out = handle_computer_use({"action": "click", "element": 1,
+                                   "capture_after": True})
+        # Noop backend returns ok=True, so capture should have been called.
+        capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
+        assert len(capture_calls) == 1
+

 # ---------------------------------------------------------------------------
 # Safety guards (type / key block lists)
@@ -337,6 +406,193 @@ class TestCaptureResponse:
        assert "AXButton" in text_part["text"]
        assert "AXTextField" in text_part["text"]

+    def _ax_backend_with(self, count: int):
+        """Construct a fake backend that yields ``count`` AX elements."""
+        from tools.computer_use.backend import CaptureResult, UIElement
+
+        elements = [
+            UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
+            for i in range(count)
+        ]
+
+        class FakeBackend:
+            def start(self): pass
+            def stop(self): pass
+            def is_available(self): return True
+            def capture(self, mode="som", app=None):
+                return CaptureResult(
+                    mode=mode, width=800, height=600,
+                    png_b64="",
+                    elements=list(elements),
+                    app="Obsidian",
+                )
+            def click(self, **kw): ...
+            def drag(self, **kw): ...
+            def scroll(self, **kw): ...
+            def type_text(self, text): ...
+            def key(self, keys): ...
+            def list_apps(self): return []
+            def focus_app(self, app, raise_window=False): ...
+
+        return FakeBackend()
+
+    def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
+        """Regression for #22865: an Electron-style 600-element AX tree must
+        not emit the entire array verbatim into the tool result.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
+
+        parsed = json.loads(out)
+        assert parsed["mode"] == "ax"
+        assert parsed["total_elements"] == 600
+        assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS
+        assert parsed["truncated_elements"] == 600 - cu_tool._DEFAULT_MAX_ELEMENTS
+        # Truncation must be visible in the human summary so the model knows
+        # the JSON view is partial and can re-issue with a tighter scope.
+        assert "truncated to" in parsed["summary"]
+
+    def test_capture_ax_honors_explicit_max_elements_override(self):
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 250}
+            )
+
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == 250
+        assert parsed["truncated_elements"] == 350
+
+    def test_capture_ax_below_cap_is_unchanged(self):
+        """Backwards-compat: small captures keep the full elements array and
+        do not surface a `truncated_elements` field.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(5)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
+
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == 5
+        assert parsed["total_elements"] == 5
+        assert "truncated_elements" not in parsed
+        assert "truncated to" not in parsed["summary"]
+
+    def test_capture_ax_invalid_max_elements_falls_back_to_default(self):
+        """Malformed `max_elements` (string, negative, zero) must not silently
+        disable the cap and re-introduce the original unbounded behavior.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        for bad in ("not-a-number", 0, -10):
+            with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+                out = cu_tool.handle_computer_use(
+                    {"action": "capture", "mode": "ax", "max_elements": bad}
+                )
+            parsed = json.loads(out)
+            assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS, (
+                f"bad max_elements={bad!r} disabled the cap"
+            )
+
+    def test_capture_ax_clamps_oversized_max_elements_to_hard_cap(self):
+        """A caller passing a very large `max_elements` must not be able to
+        disable the safeguard. The cap is clamped to a hard upper bound so
+        the context-blow-up protection cannot be bypassed by argument.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(5000)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 10_000}
+            )
+        parsed = json.loads(out)
+        assert len(parsed["elements"]) == cu_tool._MAX_ALLOWED_MAX_ELEMENTS
+        assert parsed["total_elements"] == 5000
+        assert parsed["truncated_elements"] == 5000 - cu_tool._MAX_ALLOWED_MAX_ELEMENTS
+
+    def test_capture_ax_summary_indices_match_returned_elements(self):
+        """When `max_elements` is below the human-summary's own line cap, the
+        summary must not index elements that aren't in the returned array.
+        Otherwise the model sees `#15` in the summary and finds no matching
+        entry in `elements`.
+        """
+        from tools.computer_use import tool as cu_tool
+
+        fake_backend = self._ax_backend_with(600)
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
+            out = cu_tool.handle_computer_use(
+                {"action": "capture", "mode": "ax", "max_elements": 5}
+            )
+        parsed = json.loads(out)
+        returned_indices = {e["index"] for e in parsed["elements"]}
+        summary_lines = parsed["summary"].splitlines()
+        indexed_lines = [ln for ln in summary_lines if ln.lstrip().startswith("#")]
+        for ln in indexed_lines:
+            idx_token = ln.lstrip().split()[0].lstrip("#")
+            idx = int(idx_token)
+            assert idx in returned_indices, (
+                f"summary references #{idx} but it is absent from elements payload "
+                f"(returned: {sorted(returned_indices)})"
+            )
+
+    def test_capture_multimodal_summary_omits_truncation_note(self):
+        """The som/vision multimodal envelope returns a screenshot, not an
+        `elements` array — so a "response truncated to N of M elements"
+        claim in the summary would be inaccurate.
+        """
+        from tools.computer_use.backend import CaptureResult, UIElement
+        from tools.computer_use import tool as cu_tool
+
+        fake_png = "iVBORw0KGgo="
+        elements = [
+            UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
+            for i in range(600)
+        ]
+
+        class FakeBackend:
+            def start(self): pass
+            def stop(self): pass
+            def is_available(self): return True
+            def capture(self, mode="som", app=None):
+                return CaptureResult(
+                    mode=mode, width=800, height=600,
+                    png_b64=fake_png, elements=list(elements),
+                    app="Obsidian",
+                )
+            def click(self, **kw): ...
+            def drag(self, **kw): ...
+            def scroll(self, **kw): ...
+            def type_text(self, text): ...
+            def key(self, keys): ...
+            def list_apps(self): return []
+            def focus_app(self, app, raise_window=False): ...
+
+        cu_tool.reset_backend_for_tests()
+        with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
+            out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
+
+        assert isinstance(out, dict) and out["_multimodal"] is True
+        text_part = next(p for p in out["content"] if p.get("type") == "text")
+        assert "truncated to" not in text_part["text"], (
+            "multimodal response carries an image, not an elements array; "
+            "the truncation note describes a payload field that isn't present"
+        )
+        assert "truncated to" not in out["text_summary"]
+

 # ---------------------------------------------------------------------------
 # Anthropic adapter: multimodal tool-result conversion
@@ -78,6 +78,63 @@ def test_resolve_managed_tool_gateway_is_disabled_without_subscription():
    assert result is None


+def test_rewrite_localhost_origin_rewrites_subdomain():
+    rewrite = managed_tool_gateway._rewrite_localhost_origin
+    resolved, host = rewrite("http://tools-gateway.localhost:3009")
+    assert resolved == "http://127.0.0.1:3009"
+    assert host == "tools-gateway.localhost:3009"
+
+
+def test_rewrite_localhost_origin_preserves_path():
+    rewrite = managed_tool_gateway._rewrite_localhost_origin
+    resolved, host = rewrite("http://tools-gateway.localhost:3009/v1/foo")
+    assert resolved == "http://127.0.0.1:3009/v1/foo"
+    assert host == "tools-gateway.localhost:3009"
+
+
+def test_rewrite_localhost_origin_no_port():
+    rewrite = managed_tool_gateway._rewrite_localhost_origin
+    resolved, host = rewrite("http://tools-gateway.localhost")
+    assert resolved == "http://127.0.0.1"
+    assert host == "tools-gateway.localhost"
+
+
+def test_rewrite_localhost_origin_ignores_bare_localhost():
+    rewrite = managed_tool_gateway._rewrite_localhost_origin
+    resolved, host = rewrite("http://localhost:3009")
+    assert resolved == "http://localhost:3009"
+    assert host is None
+
+
+def test_rewrite_localhost_origin_ignores_real_domains():
+    rewrite = managed_tool_gateway._rewrite_localhost_origin
+    resolved, host = rewrite("https://tools-gateway.nousresearch.com")
+    assert resolved == "https://tools-gateway.nousresearch.com"
+    assert host is None
+
+
+def test_gateway_config_resolved_origin_and_host_header():
+    cfg = managed_tool_gateway.ManagedToolGatewayConfig(
+        vendor="tools",
+        gateway_origin="http://tools-gateway.localhost:3009",
+        nous_user_token="tok",
+        managed_mode=True,
+    )
+    assert cfg.resolved_origin == "http://127.0.0.1:3009"
+    assert cfg.gateway_host_header == "tools-gateway.localhost:3009"
+
+
+def test_gateway_config_resolved_origin_passthrough_for_real_domain():
+    cfg = managed_tool_gateway.ManagedToolGatewayConfig(
+        vendor="firecrawl",
+        gateway_origin="https://firecrawl-gateway.nousresearch.com",
+        nous_user_token="tok",
+        managed_mode=True,
+    )
+    assert cfg.resolved_origin == "https://firecrawl-gateway.nousresearch.com"
+    assert cfg.gateway_host_header is None
+
+
 def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkeypatch):
    monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -91,7 +91,7 @@ class TestSSHBulkUpload:
        assert "/home/testuser/.hermes/credentials" in mkdir_str

    def test_staging_symlinks_mirror_remote_layout(self, mock_env, tmp_path):
-        """Symlinks in staging dir should mirror the remote path structure."""
+        """Symlinks in staging dir should mirror the .hermes-relative layout."""
        f1 = tmp_path / "local_a.txt"
        f1.write_text("content a")

@@ -107,9 +107,7 @@ class TestSSHBulkUpload:
                c_idx = cmd.index("-C")
                staging_dir = cmd[c_idx + 1]
                # Check the symlink exists
-                expected = os.path.join(
-                    staging_dir, "home/testuser/.hermes/skills/my_skill.md"
-                )
+                expected = os.path.join(staging_dir, "skills/my_skill.md")
                staging_paths.append(expected)
                assert os.path.islink(expected), f"Expected symlink at {expected}"
                assert os.readlink(expected) == os.path.abspath(str(f1))
@@ -166,14 +164,42 @@ class TestSSHBulkUpload:
        assert "-" in tar_cmd  # stdout
        assert "-C" in tar_cmd

-        # ssh: extract from stdin at /, preserving existing dir modes (#17767)
+        # ssh: extract from stdin at ~/.hermes, preserving existing dir modes (#17767)
        ssh_str = " ".join(ssh_cmd)
        assert "ssh" in ssh_str
        assert "tar xf -" in ssh_str
        assert "--no-overwrite-dir" in ssh_str
-        assert "-C /" in ssh_str
+        assert "-C /home/testuser/.hermes" in ssh_str
        assert "testuser@example.com" in ssh_str

+    def test_bulk_upload_never_stages_remote_home_prefix(self, mock_env, tmp_path):
+        """Regression: do not archive /home/<user> path components."""
+        f1 = tmp_path / "nested.txt"
+        f1.write_text("nested")
+        files = [(str(f1), "/home/testuser/.hermes/cache/nested.txt")]
+
+        def capture_tar_cmd(cmd, **kwargs):
+            if cmd[0] == "tar":
+                c_idx = cmd.index("-C")
+                staging_dir = cmd[c_idx + 1]
+                assert not os.path.exists(os.path.join(staging_dir, "home"))
+                expected = os.path.join(staging_dir, "cache/nested.txt")
+                assert os.path.islink(expected)
+
+            mock = MagicMock()
+            mock.stdout = MagicMock()
+            mock.returncode = 0
+            mock.poll.return_value = 0
+            mock.communicate.return_value = (b"", b"")
+            mock.stderr = MagicMock()
+            mock.stderr.read.return_value = b""
+            return mock
+
+        with patch.object(subprocess, "run",
+                          return_value=subprocess.CompletedProcess([], 0)), \
+             patch.object(subprocess, "Popen", side_effect=capture_tar_cmd):
+            mock_env._ssh_bulk_upload(files)
+
    def test_mkdir_failure_raises(self, mock_env, tmp_path):
        """mkdir failure should raise RuntimeError before tar pipe."""
        f1 = tmp_path / "y.txt"
@@ -23,6 +23,9 @@ def _fake_faster_whisper_module(mock_model):
 # ---------------------------------------------------------------------------


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def _clear_openai_env(monkeypatch):
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
@@ -12,6 +12,9 @@ from unittest.mock import MagicMock, patch
 import pytest


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def isolate_env(monkeypatch):
    """Strip every STT-related env var so the test really exercises the
@@ -42,6 +42,9 @@ def sample_ogg(tmp_path):
    return str(ogg_path)


+pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
+
+
@pytest.fixture(autouse=True)
 def clean_env(monkeypatch):
    """Ensure no real API keys leak into tests."""
@@ -0,0 +1,438 @@
+"""App integration tools — 500+ external apps via the Nous tool gateway.
+
+Four meta tools that let the LLM discover, authenticate, and execute
+real app tools at runtime through the Nous managed tool gateway.
+
+Architecture:
+  Hermes → POST JSON → tools-gateway.nousresearch.com/v1/* → External APIs
+  Auth:   Bearer <nous_user_token> (subscription-gated)
+  Vendor: "tools" in the managed gateway infra (build_vendor_gateway_url)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from typing import Any, Dict, Optional
+
+import httpx
+
+from tools.registry import registry
+from tools.managed_tool_gateway import (
+    is_managed_tool_gateway_ready,
+    resolve_managed_tool_gateway,
+)
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Timeouts per endpoint (connect, read)
+# ---------------------------------------------------------------------------
+_TIMEOUT_SEARCH = httpx.Timeout(30.0, connect=5.0)
+_TIMEOUT_SCHEMAS = httpx.Timeout(15.0, connect=5.0)
+_TIMEOUT_EXECUTE = httpx.Timeout(120.0, connect=5.0)
+_TIMEOUT_CONNECTIONS = httpx.Timeout(30.0, connect=5.0)
+
+# ---------------------------------------------------------------------------
+# Module-level cached httpx client — avoids TCP+TLS setup per tool call.
+# Follows the same thread-safe staleness pattern as image_generation_tool.py.
+# ---------------------------------------------------------------------------
+import threading
+
+_http_client: Optional[httpx.Client] = None
+_http_client_origin: Optional[str] = None
+_http_client_lock = threading.Lock()
+
+
+def _get_http_client(origin: str, verify: bool = True) -> httpx.Client:
+    """Return a reusable httpx.Client, recreated when the origin changes."""
+    global _http_client, _http_client_origin
+    with _http_client_lock:
+        if _http_client is not None and _http_client_origin == origin:
+            return _http_client
+        if _http_client is not None:
+            try:
+                _http_client.close()
+            except Exception:
+                pass
+        _http_client = httpx.Client(verify=verify)
+        _http_client_origin = origin
+        return _http_client
+
+
+# ---------------------------------------------------------------------------
+# Config / availability helpers
+# ---------------------------------------------------------------------------
+
+def _read_portal_app_tools_enabled() -> bool:
+    """Return True when the portal.app_tools config flag is on."""
+    from tools.tool_backend_helpers import portal_app_tools_enabled
+    return portal_app_tools_enabled()
+
+
+def _app_tools_available() -> bool:
+    """check_fn: True when subscription is active, gateway reachable, config on."""
+    if not _read_portal_app_tools_enabled():
+        return False
+    return is_managed_tool_gateway_ready("tools")
+
+
+def _get_current_model_name() -> Optional[str]:
+    """Best-effort read of the current model name from config.
+
+    Handles both ``"model": "name"`` and ``"model": {"default": "name"}``
+    config shapes.  Returns None if unresolvable (caller should omit the
+    field rather than sending garbage).
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, str) and model_cfg.strip():
+            return model_cfg.strip()
+        if isinstance(model_cfg, dict):
+            default = model_cfg.get("default")
+            if isinstance(default, str) and default.strip():
+                return default.strip()
+    except Exception:
+        pass
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Gateway HTTP client
+# ---------------------------------------------------------------------------
+
+def _gateway_post(
+    path: str,
+    payload: Dict[str, Any],
+    timeout: httpx.Timeout,
+) -> Dict[str, Any]:
+    """POST JSON to the tool gateway and return the parsed response.
+
+    Never raises — HTTP errors and network failures are returned as dicts
+    so the LLM can see them and communicate with the user.
+    """
+    gateway = resolve_managed_tool_gateway("tools")
+    if gateway is None:
+        return {
+            "error": {
+                "code": "GATEWAY_UNAVAILABLE",
+                "message": "Nous tool gateway is not available. Check your subscription status.",
+            }
+        }
+
+    url = f"{gateway.gateway_origin.rstrip('/')}{path}"
+    headers = {
+        "Authorization": f"Bearer {gateway.nous_user_token}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        client = _get_http_client(url.split("/v1/")[0])
+        response = client.post(url, json=payload, headers=headers, timeout=timeout)
+
+        # Return parsed body regardless of status code — the LLM handles errors
+        try:
+            return response.json()
+        except Exception:
+            return {
+                "error": {
+                    "code": f"HTTP_{response.status_code}",
+                    "message": response.text[:2000],
+                }
+            }
+
+    except httpx.TimeoutException as exc:
+        return {
+            "error": {
+                "code": "GATEWAY_TIMEOUT",
+                "message": f"Request to {path} timed out: {exc}",
+            }
+        }
+    except Exception as exc:
+        return {
+            "error": {
+                "code": "GATEWAY_UNREACHABLE",
+                "message": f"Failed to reach tool gateway: {exc}",
+            }
+        }
+
+
+# ---------------------------------------------------------------------------
+# Tool handlers
+# ---------------------------------------------------------------------------
+
+def handle_app_search_tools(args: dict, **kw) -> str:
+    """Search 500+ app integrations for tools matching a use case."""
+    payload: Dict[str, Any] = {}
+
+    queries = args.get("queries")
+    if queries:
+        payload["queries"] = queries
+
+    # session is an OBJECT {id, generate_id} — NOT a string
+    session = args.get("session")
+    if session is not None:
+        payload["session"] = session
+
+    # Auto-inject model name from config (omit if unresolvable)
+    model = args.get("model") or _get_current_model_name()
+    if model:
+        payload["model"] = model
+
+    return json.dumps(_gateway_post("/v1/search", payload, _TIMEOUT_SEARCH),
+                      ensure_ascii=False, default=str)
+
+
+def handle_app_tool_schemas(args: dict, **kw) -> str:
+    """Get full input schemas for tools discovered via app_search_tools."""
+    payload: Dict[str, Any] = {}
+
+    tool_slugs = args.get("tool_slugs")
+    if tool_slugs:
+        payload["tool_slugs"] = tool_slugs
+
+    include = args.get("include")
+    if include:
+        payload["include"] = include
+
+    # session_id is a STRING — not an object
+    session_id = args.get("session_id")
+    if session_id is not None:
+        payload["session_id"] = session_id
+
+    return json.dumps(_gateway_post("/v1/schemas", payload, _TIMEOUT_SCHEMAS),
+                      ensure_ascii=False, default=str)
+
+
+def handle_app_execute_tools(args: dict, **kw) -> str:
+    """Execute one or more app tools in parallel."""
+    payload: Dict[str, Any] = {}
+
+    tools = args.get("tools")
+    if tools:
+        payload["tools"] = tools
+
+    # session_id is a STRING
+    session_id = args.get("session_id")
+    if session_id is not None:
+        payload["session_id"] = session_id
+
+    # Strip gateway-internal params that are meaningless in Hermes
+    # (sync_response_to_workbench, thought, current_step, current_step_metric)
+    # They never enter the payload — we only pick the fields we need.
+
+    return json.dumps(_gateway_post("/v1/execute", payload, _TIMEOUT_EXECUTE),
+                      ensure_ascii=False, default=str)
+
+
+def handle_app_manage_connections(args: dict, **kw) -> str:
+    """Check or initiate OAuth/API key connections for app toolkits."""
+    payload: Dict[str, Any] = {}
+
+    toolkits = args.get("toolkits")
+    if toolkits:
+        payload["toolkits"] = toolkits
+
+    reinitiate_all = args.get("reinitiate_all")
+    if reinitiate_all is not None:
+        payload["reinitiate_all"] = reinitiate_all
+
+    # session_id is a STRING
+    session_id = args.get("session_id")
+    if session_id is not None:
+        payload["session_id"] = session_id
+
+    return json.dumps(_gateway_post("/v1/connections", payload, _TIMEOUT_CONNECTIONS),
+                      ensure_ascii=False, default=str)
+
+
+# ---------------------------------------------------------------------------
+# Tool registration
+# ---------------------------------------------------------------------------
+
+registry.register(
+    name="app_search_tools",
+    toolset="app_tools",
+    schema={
+        "name": "app_search_tools",
+        "description": (
+            "Search 500+ app integrations (Gmail, Slack, GitHub, Notion, Google Sheets, "
+            "Jira, Linear, Figma, and more) to find tools for a task. Returns tool slugs, "
+            "execution plans, pitfalls, and connection status."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": ["queries"],
+            "properties": {
+                "queries": {
+                    "type": "array",
+                    "minItems": 1,
+                    "description": (
+                        "Structured search queries. Split independent app actions "
+                        "into separate queries. Each returns 4-6 tools."
+                    ),
+                    "items": {
+                        "type": "object",
+                        "required": ["use_case"],
+                        "properties": {
+                            "use_case": {
+                                "type": "string",
+                                "maxLength": 1024,
+                                "description": (
+                                    "Normalized description of the task. Include app "
+                                    "names if mentioned. Do NOT include personal "
+                                    "identifiers — put those in known_fields."
+                                ),
+                            },
+                            "known_fields": {
+                                "type": "string",
+                                "description": (
+                                    "Known inputs as comma-separated key:value pairs "
+                                    "(e.g. 'channel_name:general'). Omit if not relevant."
+                                ),
+                            },
+                        },
+                    },
+                },
+                "session": {
+                    "type": "object",
+                    "description": "Session context. Pass {generate_id: true} for new workflows, {id: \"EXISTING\"} to continue.",
+                    "properties": {
+                        "id": {"type": "string", "description": "Existing session ID to reuse."},
+                        "generate_id": {"type": "boolean", "description": "Set true for first call of a new workflow."},
+                    },
+                },
+            },
+        },
+    },
+    handler=lambda args, **kw: handle_app_search_tools(args, **kw),
+    check_fn=_app_tools_available,
+    description="Search 500+ app integrations",
+    emoji="🔍",
+)
+
+registry.register(
+    name="app_tool_schemas",
+    toolset="app_tools",
+    schema={
+        "name": "app_tool_schemas",
+        "description": (
+            "Get full input parameter schemas for tools discovered via "
+            "app_search_tools. Only use slugs from search results — never invent."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": ["tool_slugs"],
+            "properties": {
+                "tool_slugs": {
+                    "type": "array",
+                    "description": "Tool slugs to retrieve schemas for.",
+                    "items": {"type": "string", "minLength": 1},
+                },
+                "include": {
+                    "type": "array",
+                    "default": ["input_schema"],
+                    "description": "Schema fields to include. Add 'output_schema' for response validation.",
+                    "items": {"type": "string", "enum": ["input_schema", "output_schema"]},
+                },
+                "session_id": {
+                    "type": "string",
+                    "description": "Session ID from a prior app_search_tools call.",
+                },
+            },
+        },
+    },
+    handler=lambda args, **kw: handle_app_tool_schemas(args, **kw),
+    check_fn=_app_tools_available,
+    description="Get tool input schemas",
+    emoji="📋",
+)
+
+registry.register(
+    name="app_execute_tools",
+    toolset="app_tools",
+    schema={
+        "name": "app_execute_tools",
+        "description": (
+            "Execute one or more app tools in parallel (up to 50). "
+            "Requires active connection per toolkit. Use schema-compliant arguments only."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": ["tools"],
+            "properties": {
+                "tools": {
+                    "type": "array",
+                    "minItems": 1,
+                    "maxItems": 50,
+                    "description": "Logically independent tools to execute in parallel.",
+                    "items": {
+                        "type": "object",
+                        "required": ["tool_slug", "arguments"],
+                        "additionalProperties": False,
+                        "properties": {
+                            "tool_slug": {
+                                "type": "string",
+                                "minLength": 1,
+                                "description": "Tool slug from search results — never invent.",
+                            },
+                            "arguments": {
+                                "type": "object",
+                                "additionalProperties": True,
+                                "description": "Arguments matching the tool's input schema exactly.",
+                            },
+                        },
+                    },
+                },
+                "session_id": {
+                    "type": "string",
+                    "description": "Session ID from a prior app_search_tools call.",
+                },
+            },
+        },
+    },
+    handler=lambda args, **kw: handle_app_execute_tools(args, **kw),
+    check_fn=_app_tools_available,
+    max_result_size_chars=50_000,
+    description="Execute app tools",
+    emoji="⚡",
+)
+
+registry.register(
+    name="app_manage_connections",
+    toolset="app_tools",
+    schema={
+        "name": "app_manage_connections",
+        "description": (
+            "Check or initiate OAuth/API key connections for app toolkits. "
+            "Returns auth links for inactive connections."
+        ),
+        "parameters": {
+            "type": "object",
+            "required": ["toolkits"],
+            "properties": {
+                "toolkits": {
+                    "type": "array",
+                    "description": "Toolkit slugs to check or connect (e.g. ['gmail', 'slack']).",
+                    "items": {"type": "string"},
+                },
+                "reinitiate_all": {
+                    "type": "boolean",
+                    "default": False,
+                    "description": "Force reconnection even for active connections.",
+                },
+                "session_id": {
+                    "type": "string",
+                    "description": "Session ID from a prior app_search_tools call.",
+                },
+            },
+        },
+    },
+    handler=lambda args, **kw: handle_app_manage_connections(args, **kw),
+    check_fn=_app_tools_available,
+    description="Manage app connections",
+    emoji="🔗",
+)
@@ -142,6 +142,14 @@ class ComputerUseBackend(ABC):
    def focus_app(self, app: str, raise_window: bool = False) -> ActionResult:
        """Route input to `app` (by name or bundle ID). Default: focus without raise."""

+    # ── Native-value mutation ────────────────────────────────────────
+    @abstractmethod
+    def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
+        """Set a native value on an element (e.g. AXPopUpButton selection).
+
+        `element` is the 1-based SOM index returned by a prior capture call.
+        """
+
    # ── Timing ──────────────────────────────────────────────────────
    def wait(self, seconds: float) -> ActionResult:
        """Default implementation: time.sleep."""
@@ -75,6 +75,28 @@ COMPUTER_USE_SCHEMA: Dict[str, Any] = {
                    "frontmost app's window or the whole screen."
                ),
            },
+            "max_elements": {
+                "type": "integer",
+                "description": (
+                    "Optional cap on the AX `elements` array returned by "
+                    "`action='capture'`. Default 100, hard maximum 1000. "
+                    "Dense UIs (Electron apps such as Obsidian or VS Code, "
+                    "JetBrains IDEs) can publish 500+ AX nodes — capping "
+                    "prevents a single capture from blowing session "
+                    "context. When the cap trims the response, "
+                    "`total_elements` and `truncated_elements` are "
+                    "surfaced in the result so you can re-call with "
+                    "`app=` to narrow scope or raise `max_elements` when "
+                    "the full tree is required. Has no effect on "
+                    "`mode='som'` / `mode='vision'` when a screenshot is "
+                    "included in the response; only the rare image-"
+                    "missing fallback returns an `elements` array and is "
+                    "subject to the cap."
+                ),
+                "default": 100,
+                "minimum": 1,
+                "maximum": 1000,
+            },
            # ── click / drag / scroll targeting ────────────────────
            "element": {
                "type": "integer",
@@ -200,6 +200,10 @@ class _NoopBackend(ComputerUseBackend):  # pragma: no cover
        self.calls.append(("focus_app", {"app": app, "raise": raise_window}))
        return ActionResult(ok=True, action="focus_app")

+    def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
+        self.calls.append(("set_value", {"value": value, "element": element}))
+        return ActionResult(ok=True, action="set_value")
+

 # ---------------------------------------------------------------------------
 # Dispatch
@@ -317,7 +321,7 @@ def _dispatch(backend: ComputerUseBackend, action: str, args: Dict[str, Any]) ->
        if mode not in {"som", "vision", "ax"}:
            return json.dumps({"error": f"bad mode {mode!r}; use som|vision|ax"})
        cap = backend.capture(mode=mode, app=args.get("app"))
-        return _capture_response(cap)
+        return _capture_response(cap, max_elements=_coerce_max_elements(args.get("max_elements")))

    if action == "wait":
        seconds = float(args.get("seconds", 1.0))
@@ -416,16 +420,62 @@ def _text_response(res: ActionResult) -> str:
    return json.dumps(payload)


-def _capture_response(cap: CaptureResult) -> Any:
-    element_index = _format_elements(cap.elements)
+# Default cap for the AX `elements` array returned by capture. Dense UIs
+# (Electron apps, Obsidian, JetBrains IDEs) can publish 500+ AX nodes, which
+# can exhaust session context after a single capture. The model-facing
+# `max_elements` argument lets callers raise this when they need the full tree.
+_DEFAULT_MAX_ELEMENTS = 100
+# Hard upper bound on caller-supplied `max_elements`. Without this, a tool
+# call passing a very large integer would silently disable the safeguard and
+# reintroduce the original unbounded behavior.
+_MAX_ALLOWED_MAX_ELEMENTS = 1000
+
+
+def _coerce_max_elements(value: Any) -> int:
+    """Validate the caller-supplied ``max_elements``.
+
+    Falls back to :data:`_DEFAULT_MAX_ELEMENTS` for missing / non-integer /
+    sub-1 inputs so the cap can never be silently disabled by a malformed
+    tool-call argument. Clamps oversized values to
+    :data:`_MAX_ALLOWED_MAX_ELEMENTS` so a caller cannot bypass the
+    safeguard by passing a very large integer.
+    """
+    if value is None:
+        return _DEFAULT_MAX_ELEMENTS
+    try:
+        n = int(value)
+    except (TypeError, ValueError):
+        return _DEFAULT_MAX_ELEMENTS
+    if n < 1:
+        return _DEFAULT_MAX_ELEMENTS
+    if n > _MAX_ALLOWED_MAX_ELEMENTS:
+        return _MAX_ALLOWED_MAX_ELEMENTS
+    return n
+
+
+def _capture_response(cap: CaptureResult, max_elements: int = _DEFAULT_MAX_ELEMENTS) -> Any:
+    total_elements = len(cap.elements)
+    visible_elements = cap.elements[:max_elements]
+    truncated_elements = max(0, total_elements - len(visible_elements))
+
+    # Index only what's actually surfaced in the response — otherwise the
+    # human-readable summary references element indices the model cannot
+    # find in the JSON `elements` array (e.g. max_elements=10 vs the default
+    # 40-line index window).
+    element_index = _format_elements(visible_elements)
    summary_lines = [
        f"capture mode={cap.mode} {cap.width}x{cap.height}"
        + (f" app={cap.app}" if cap.app else "")
        + (f" window={cap.window_title!r}" if cap.window_title else ""),
-        f"{len(cap.elements)} interactable element(s):",
+        f"{total_elements} interactable element(s):",
    ]
    if element_index:
        summary_lines.extend(element_index)
+    # Multimodal and AX paths both reference `summary`; build it once up-front
+    # so the aux-vision routing branch (which fires before either path is
+    # selected) has a valid value to hand to _route_capture_through_aux_vision.
+    # The AX path appends the "truncated to N of M" note to summary_lines
+    # below and rebuilds; the multimodal path keeps this version untouched.
    summary = "\n".join(summary_lines)

    if cap.png_b64 and cap.mode != "ax":
@@ -449,6 +499,9 @@ def _capture_response(cap: CaptureResult) -> Any:
        # JPEG: base64 starts with /9j/   PNG: starts with iVBOR
        _b64_prefix = cap.png_b64[:8]
        _mime = "image/jpeg" if _b64_prefix.startswith("/9j/") else "image/png"
+        # The multimodal response carries the screenshot, not the AX
+        # elements array, so a "response truncated to N of M elements"
+        # note would be inaccurate — skip it on this branch.
        return {
            "_multimodal": True,
            "content": [
@@ -458,18 +511,29 @@ def _capture_response(cap: CaptureResult) -> Any:
            ],
            "text_summary": summary,
            "meta": {"mode": cap.mode, "width": cap.width, "height": cap.height,
-                     "elements": len(cap.elements), "png_bytes": cap.png_bytes_len},
+                     "elements": total_elements, "png_bytes": cap.png_bytes_len},
        }
-    # AX-only (or image missing): text path.
-    return json.dumps({
+    # AX-only (or image-missing fallback): text path actually carries the
+    # `elements` array, so the truncation note applies here.
+    if truncated_elements:
+        summary_lines.append(
+            f"  (response truncated to {len(visible_elements)} of {total_elements} elements; "
+            f"raise max_elements or pass app= to narrow)"
+        )
+    summary = "\n".join(summary_lines)
+    payload: Dict[str, Any] = {
        "mode": cap.mode,
        "width": cap.width,
        "height": cap.height,
        "app": cap.app,
        "window_title": cap.window_title,
-        "elements": [_element_to_dict(e) for e in cap.elements],
+        "elements": [_element_to_dict(e) for e in visible_elements],
+        "total_elements": total_elements,
        "summary": summary,
-    })
+    }
+    if truncated_elements:
+        payload["truncated_elements"] = truncated_elements
+    return json.dumps(payload)


 # ---------------------------------------------------------------------------
@@ -611,6 +675,11 @@ def _maybe_follow_capture(
 ) -> Any:
    if not do_capture:
        return _text_response(res)
+    # Skip the follow-up capture when the action itself failed: showing a
+    # normal-looking screenshot after a failure misleads the model into thinking
+    # the action succeeded. Return the error text instead.
+    if not res.ok:
+        return _text_response(res)
    try:
        # Preserve the app context established by the preceding capture/focus_app so
        # that capture_after=True re-captures the same app rather than the frontmost
@@ -60,7 +60,8 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
        if gateway is None:
            raise ValueError("Managed Modal requires a configured tool gateway and Nous user token")

-        self._gateway_origin = gateway.gateway_origin.rstrip("/")
+        self._gateway_origin = gateway.resolved_origin.rstrip("/")
+        self._gateway_host_header = gateway.gateway_host_header
        self._nous_user_token = gateway.nous_user_token
        self._task_id = task_id
        self._persistent = persistent_filesystem
@@ -234,6 +235,8 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
            "Authorization": f"Bearer {self._nous_user_token}",
            "Content-Type": "application/json",
        }
+        if self._gateway_host_header:
+            headers["Host"] = self._gateway_host_header
        if extra_headers:
            headers.update(extra_headers)

@@ -169,6 +169,7 @@ class SSHEnvironment(BaseEnvironment):
        if not files:
            return

+        base = f"{self._remote_home}/.hermes"
        parents = unique_parent_dirs(files)
        if parents:
            cmd = self._build_ssh_command()
@@ -180,7 +181,19 @@ class SSHEnvironment(BaseEnvironment):
        # Symlink staging avoids fragile GNU tar --transform rules.
        with tempfile.TemporaryDirectory(prefix="hermes-ssh-bulk-") as staging:
            for host_path, remote_path in files:
-                staged = os.path.join(staging, remote_path.lstrip("/"))
+                try:
+                    rel_remote = os.path.relpath(remote_path, base)
+                except ValueError as exc:
+                    raise RuntimeError(
+                        f"remote path {remote_path!r} is not under sync base {base!r}"
+                    ) from exc
+
+                if rel_remote == "." or rel_remote.startswith("../"):
+                    raise RuntimeError(
+                        f"remote path {remote_path!r} escapes sync base {base!r}"
+                    )
+
+                staged = os.path.join(staging, rel_remote)
                os.makedirs(os.path.dirname(staged), exist_ok=True)
                os.symlink(os.path.abspath(host_path), staged)

@@ -190,7 +203,7 @@ class SSHEnvironment(BaseEnvironment):
            # existing directories (e.g. /home/<user>) with the staging
            # directory's mode.  Without this, a umask 002 produces 0775
            # dirs which breaks sshd StrictModes (refuses authorized_keys).
-            ssh_cmd.append("tar xf - --no-overwrite-dir -C /")
+            ssh_cmd.append(f"tar xf - --no-overwrite-dir -C {shlex.quote(base)}")

            tar_proc = subprocess.Popen(
                tar_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
@@ -0,0 +1,163 @@
+"""Shared FAL.ai SDK plumbing.
+
+Holds the stateless atoms that every FAL-backed tool needs:
+
+* :func:`import_fal_client` — lazy import + ``lazy_deps`` integration so
+  ``fal_client`` isn't pulled at cold start (it added ~64 ms per CLI
+  invocation when imported eagerly).
+* :class:`_ManagedFalSyncClient` — wrapper that drives a Nous-managed
+  fal-queue gateway through the standard ``fal_client.SyncClient``
+  primitives.
+* :func:`_normalize_fal_queue_url_format`, :func:`_extract_http_status`
+  — small helpers used by both the managed client wrapper and
+  ``_submit_fal_request``.
+
+Stateful pieces (cache globals, ``_managed_fal_client*`` selectors,
+``_submit_fal_request``) intentionally stay on
+:mod:`tools.image_generation_tool`. That module is the patch target for
+existing test suites (``tests/tools/test_image_generation.py``,
+``tests/tools/test_managed_media_gateways.py``) and for the
+``plugins/image_gen/fal/`` plugin's ``_it`` indirection — moving the
+caches here would silently defeat ``monkeypatch.setattr(image_tool,
+"_managed_fal_client", None)`` because the lookups would go against
+``fal_common``'s namespace instead. See the per-rule walkthrough at
+issue #26241 for details.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, Optional, Union
+from urllib.parse import urlencode
+
+
+def import_fal_client() -> Any:
+    """Import ``fal_client`` (via ``lazy_deps`` when available) and return
+    the module reference.
+
+    Callers are responsible for caching the result on their own module
+    global — keeping per-module globals lets tests monkey-patch the
+    target module's ``fal_client`` attribute and have the patched value
+    stick for that module's call sites.
+
+    Raises :class:`ImportError` if the package is genuinely unavailable.
+    """
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("image.fal", prompt=False)
+    except ImportError:
+        pass
+    except Exception as exc:  # noqa: BLE001 — lazy_deps surfaces install hints
+        raise ImportError(str(exc))
+    import fal_client  # type: ignore  # noqa: WPS433 — intentionally lazy
+    return fal_client
+
+
+def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
+    normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
+    if not normalized_origin:
+        raise ValueError("Managed FAL queue origin is required")
+    return f"{normalized_origin}/"
+
+
+def _extract_http_status(exc: BaseException) -> Optional[int]:
+    """Return an HTTP status code from httpx/fal exceptions, else None.
+
+    Defensive across exception shapes — httpx.HTTPStatusError exposes
+    ``.response.status_code`` while fal_client wrappers may expose
+    ``.status_code`` directly.
+    """
+    response = getattr(exc, "response", None)
+    if response is not None:
+        status = getattr(response, "status_code", None)
+        if isinstance(status, int):
+            return status
+    status = getattr(exc, "status_code", None)
+    if isinstance(status, int):
+        return status
+    return None
+
+
+class _ManagedFalSyncClient:
+    """Small per-instance wrapper around ``fal_client.SyncClient`` for
+    managed queue hosts.
+
+    The wrapper carries its own ``fal_client`` module reference instead
+    of reaching into a module global, so callers stay in control of
+    which module's ``fal_client`` is in scope (matters for the test
+    patches that swap the legacy module's ``fal_client`` attribute).
+    """
+
+    def __init__(self, fal_client: Any, *, key: str, queue_run_origin: str):
+        sync_client_class = getattr(fal_client, "SyncClient", None)
+        if sync_client_class is None:
+            raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
+
+        client_module = getattr(fal_client, "client", None)
+        if client_module is None:
+            raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
+
+        self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
+        self._sync_client = sync_client_class(key=key)
+        self._http_client = getattr(self._sync_client, "_client", None)
+        self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
+        self._raise_for_status = getattr(client_module, "_raise_for_status", None)
+        self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
+        self._add_hint_header = getattr(client_module, "add_hint_header", None)
+        self._add_priority_header = getattr(client_module, "add_priority_header", None)
+        self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
+
+        if self._http_client is None:
+            raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
+        if self._maybe_retry_request is None or self._raise_for_status is None:
+            raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
+        if self._request_handle_class is None:
+            raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
+
+    def submit(
+        self,
+        application: str,
+        arguments: Dict[str, Any],
+        *,
+        path: str = "",
+        hint: Optional[str] = None,
+        webhook_url: Optional[str] = None,
+        priority: Any = None,
+        headers: Optional[Dict[str, str]] = None,
+        start_timeout: Optional[Union[int, float]] = None,
+    ):
+        url = self._queue_url_format + application
+        if path:
+            url += "/" + path.lstrip("/")
+        if webhook_url is not None:
+            url += "?" + urlencode({"fal_webhook": webhook_url})
+
+        request_headers = dict(headers or {})
+        if hint is not None and self._add_hint_header is not None:
+            self._add_hint_header(hint, request_headers)
+        if priority is not None:
+            if self._add_priority_header is None:
+                raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
+            self._add_priority_header(priority, request_headers)
+        if start_timeout is not None:
+            if self._add_timeout_header is None:
+                raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
+            self._add_timeout_header(start_timeout, request_headers)
+
+        response = self._maybe_retry_request(
+            self._http_client,
+            "POST",
+            url,
+            json=arguments,
+            timeout=getattr(self._sync_client, "default_timeout", 120.0),
+            headers=request_headers,
+        )
+        self._raise_for_status(response)
+
+        data = response.json()
+        return self._request_handle_class(
+            request_id=data["request_id"],
+            response_url=data["response_url"],
+            status_url=data["status_url"],
+            cancel_url=data["cancel_url"],
+            client=self._http_client,
+        )
@@ -26,8 +26,7 @@ import os
 import datetime
 import threading
 import uuid
-from typing import Any, Dict, Optional, Union
-from urllib.parse import urlencode
+from typing import Any, Dict, Optional

 # fal_client is imported lazily — see _load_fal_client(). Pulling it
 # eagerly added ~64 ms to every CLI cold start because
@@ -52,19 +51,17 @@ def _load_fal_client() -> Any:
    global fal_client
    if fal_client is not None:
        return fal_client
-    try:
-        from tools.lazy_deps import ensure as _lazy_ensure
-        _lazy_ensure("image.fal", prompt=False)
-    except ImportError:
-        pass
-    except Exception as e:
-        raise ImportError(str(e))
-    import fal_client as _fal_client  # noqa: F811 — module-global rebind
-    fal_client = _fal_client
+    from tools.fal_common import import_fal_client
+    fal_client = import_fal_client()
    return fal_client


 from tools.debug_helpers import DebugSession
+from tools.fal_common import (
+    _ManagedFalSyncClient,
+    _extract_http_status,
+    _normalize_fal_queue_url_format,  # noqa: F401 — re-exported for tests
+)
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import (
    fal_key_is_configured,
@@ -360,110 +357,25 @@ def _resolve_managed_fal_gateway():
    return resolve_managed_tool_gateway("fal-queue")


-def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
-    normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
-    if not normalized_origin:
-        raise ValueError("Managed FAL queue origin is required")
-    return f"{normalized_origin}/"
-
-
-class _ManagedFalSyncClient:
-    """Small per-instance wrapper around fal_client.SyncClient for managed queue hosts."""
-
-    def __init__(self, *, key: str, queue_run_origin: str):
-        # Trigger the lazy import on first construction. Idempotent — the
-        # placeholder is overwritten with the real module on first call.
-        _load_fal_client()
-        sync_client_class = getattr(fal_client, "SyncClient", None)
-        if sync_client_class is None:
-            raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
-
-        client_module = getattr(fal_client, "client", None)
-        if client_module is None:
-            raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
-
-        self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
-        self._sync_client = sync_client_class(key=key)
-        self._http_client = getattr(self._sync_client, "_client", None)
-        self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
-        self._raise_for_status = getattr(client_module, "_raise_for_status", None)
-        self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
-        self._add_hint_header = getattr(client_module, "add_hint_header", None)
-        self._add_priority_header = getattr(client_module, "add_priority_header", None)
-        self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
-
-        if self._http_client is None:
-            raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
-        if self._maybe_retry_request is None or self._raise_for_status is None:
-            raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
-        if self._request_handle_class is None:
-            raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
-
-    def submit(
-        self,
-        application: str,
-        arguments: Dict[str, Any],
-        *,
-        path: str = "",
-        hint: Optional[str] = None,
-        webhook_url: Optional[str] = None,
-        priority: Any = None,
-        headers: Optional[Dict[str, str]] = None,
-        start_timeout: Optional[Union[int, float]] = None,
-    ):
-        url = self._queue_url_format + application
-        if path:
-            url += "/" + path.lstrip("/")
-        if webhook_url is not None:
-            url += "?" + urlencode({"fal_webhook": webhook_url})
-
-        request_headers = dict(headers or {})
-        if hint is not None and self._add_hint_header is not None:
-            self._add_hint_header(hint, request_headers)
-        if priority is not None:
-            if self._add_priority_header is None:
-                raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
-            self._add_priority_header(priority, request_headers)
-        if start_timeout is not None:
-            if self._add_timeout_header is None:
-                raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
-            self._add_timeout_header(start_timeout, request_headers)
-
-        response = self._maybe_retry_request(
-            self._http_client,
-            "POST",
-            url,
-            json=arguments,
-            timeout=getattr(self._sync_client, "default_timeout", 120.0),
-            headers=request_headers,
-        )
-        self._raise_for_status(response)
-
-        data = response.json()
-        return self._request_handle_class(
-            request_id=data["request_id"],
-            response_url=data["response_url"],
-            status_url=data["status_url"],
-            cancel_url=data["cancel_url"],
-            client=self._http_client,
-        )
-
-
 def _get_managed_fal_client(managed_gateway):
    """Reuse the managed FAL client so its internal httpx.Client is not leaked per call."""
    global _managed_fal_client, _managed_fal_client_config

    client_config = (
-        managed_gateway.gateway_origin.rstrip("/"),
+        managed_gateway.resolved_origin.rstrip("/"),
        managed_gateway.nous_user_token,
    )
    with _managed_fal_client_lock:
        if _managed_fal_client is not None and _managed_fal_client_config == client_config:
            return _managed_fal_client

+        # Resolve fal_client on the legacy module — preserves the test
+        # pattern of monkey-patching ``image_generation_tool.fal_client``.
+        _load_fal_client()
        _managed_fal_client = _ManagedFalSyncClient(
+            fal_client,
            key=managed_gateway.nous_user_token,
-            queue_run_origin=managed_gateway.gateway_origin,
+            queue_run_origin=managed_gateway.resolved_origin,
        )
        _managed_fal_client_config = client_config
        return _managed_fal_client
@@ -502,24 +414,6 @@ def _submit_fal_request(model: str, arguments: Dict[str, Any]):
        raise


-def _extract_http_status(exc: BaseException) -> Optional[int]:
-    """Return an HTTP status code from httpx/fal exceptions, else None.
-
-    Defensive across exception shapes — httpx.HTTPStatusError exposes
-    ``.response.status_code`` while fal_client wrappers may expose
-    ``.status_code`` directly.
-    """
-    response = getattr(exc, "response", None)
-    if response is not None:
-        status = getattr(response, "status_code", None)
-        if isinstance(status, int):
-            return status
-    status = getattr(exc, "status_code", None)
-    if isinstance(status, int):
-        return status
-    return None
-
-
 # ---------------------------------------------------------------------------
 # Model resolution + payload construction
 # ---------------------------------------------------------------------------
@@ -973,9 +867,12 @@ def _read_configured_image_provider():
    """Return the value of ``image_gen.provider`` from config.yaml, or None.

    We only consult the plugin registry when this is explicitly set — an
-    unset value keeps users on the legacy in-tree FAL path even when other
+    unset value keeps users on the in-tree FAL fallback even when other
    providers happen to be registered (e.g. a user has OPENAI_API_KEY set
-    for other features but never asked for OpenAI image gen).
+    for other features but never asked for OpenAI image gen). ``"fal"``
+    explicitly routes through ``plugins/image_gen/fal/`` (which delegates
+    back into this module's pipeline via call-time indirection — see
+    issue #26241).
    """
    try:
        from hermes_cli.config import load_config
@@ -994,15 +891,16 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
    """Route the call to a plugin-registered provider when one is selected.

    Returns a JSON string on dispatch, or ``None`` to fall through to the
-    built-in FAL path.
+    in-tree FAL fallback in ``image_generate_tool``.

-    Dispatch only fires when ``image_gen.provider`` is explicitly set AND
-    it does not point to ``fal`` (FAL still lives in-tree in this PR;
-    a later PR ports it into ``plugins/image_gen/fal/``). Any other value
-    that matches a registered plugin provider wins.
+    Dispatch fires when ``image_gen.provider`` is explicitly set — including
+    ``"fal"`` itself, which now resolves to the
+    ``plugins/image_gen/fal/`` plugin (the plugin re-enters this module's
+    pipeline via ``_it`` indirection so behavior is identical to the
+    direct call, just routed through the registry).
    """
    configured = _read_configured_image_provider()
-    if not configured or configured == "fal":
+    if not configured:
        return None

    # Also read configured model so we can pass it to the plugin
@@ -7,7 +7,8 @@ import logging
 import os
 from datetime import datetime, timezone
 from dataclasses import dataclass
-from typing import Callable, Optional
+from typing import Callable, Optional, Tuple
+from urllib.parse import urlparse, urlunparse

 logger = logging.getLogger(__name__)

@@ -15,6 +16,27 @@ from hermes_constants import get_hermes_home
 from tools.tool_backend_helpers import managed_nous_tools_enabled

 _DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com"
+
+
+def _rewrite_localhost_origin(origin: str) -> Tuple[str, Optional[str]]:
+    """Rewrite ``*.localhost`` hostnames to ``127.0.0.1`` for DNS compatibility.
+
+    Python's :func:`socket.getaddrinfo` doesn't special-case ``*.localhost``
+    subdomains (RFC 6761), so ``tools-gateway.localhost`` fails DNS resolution
+    on most platforms.  Bare ``localhost`` resolves fine and is left untouched.
+
+    Returns ``(resolved_origin, host_header_or_none)``.
+    """
+    parsed = urlparse(origin)
+    hostname = parsed.hostname
+    if not hostname or not hostname.endswith(".localhost"):
+        return origin, None
+
+    port = parsed.port
+    netloc = f"127.0.0.1:{port}" if port else "127.0.0.1"
+    host_header = f"{hostname}:{port}" if port else hostname
+    resolved = urlunparse(parsed._replace(netloc=netloc))
+    return resolved, host_header
 _DEFAULT_TOOL_GATEWAY_SCHEME = "https"
 _NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120

@@ -26,6 +48,16 @@ class ManagedToolGatewayConfig:
    nous_user_token: str
    managed_mode: bool

+    @property
+    def resolved_origin(self) -> str:
+        """Origin with ``*.localhost`` hostnames rewritten to ``127.0.0.1``."""
+        return _rewrite_localhost_origin(self.gateway_origin)[0]
+
+    @property
+    def gateway_host_header(self) -> Optional[str]:
+        """Original ``host[:port]`` when the origin was rewritten, else ``None``."""
+        return _rewrite_localhost_origin(self.gateway_origin)[1]
+

 def auth_json_path():
    """Return the Hermes auth store path, respecting HERMES_HOME overrides."""
@@ -21,6 +21,11 @@ def managed_nous_tools_enabled() -> bool:
    the free tier.  We intentionally catch all exceptions and return
    False — never block the agent startup path.
    """
+    import os
+
+    if os.getenv("TOOL_GATEWAY_USER_TOKEN", "").strip():
+        return True
+
    try:
        from hermes_cli.auth import get_nous_auth_status

@@ -123,6 +128,25 @@ def prefers_gateway(config_section: str) -> bool:
    return False


+def portal_app_tools_enabled() -> bool:
+    """Return True when the portal.app_tools config flag is on.
+
+    Resolution: PORTAL_APP_TOOLS env var → config.yaml → default True.
+    Never raises — safe for check_fn and registration-time use.
+    """
+    env_val = os.getenv("PORTAL_APP_TOOLS")
+    if env_val is not None:
+        return is_truthy_value(env_val)
+    try:
+        from hermes_cli.config import load_config
+        portal = (load_config() or {}).get("portal")
+        if isinstance(portal, dict):
+            return bool(portal.get("app_tools", True))
+    except Exception:
+        pass
+    return True
+
+
 def fal_key_is_configured() -> bool:
    """Return True when FAL_KEY is set to a non-whitespace value.

@@ -197,6 +197,26 @@ def _normalize_local_command_model(model_name: Optional[str]) -> str:
    return _normalize_local_model(model_name)


+def _try_lazy_install_stt() -> bool:
+    """Attempt to lazy-install faster-whisper and return True on success.
+
+    The module-level ``_HAS_FASTER_WHISPER`` flag is set at import time and
+    cached. If the package wasn't installed at startup, calling ``ensure()``
+    installs it. This function re-checks dynamically after installation so
+    the provider can use it immediately without a process restart.
+    """
+    try:
+        from tools.lazy_deps import ensure
+        ensure("stt.faster_whisper")
+        # Re-check dynamically after install
+        import importlib.util as _iu
+        if _iu.find_spec("faster_whisper"):
+            return True
+    except Exception as exc:
+        logger.debug("Lazy install of faster-whisper failed: %s", exc)
+    return False
+
+
 def _get_provider(stt_config: dict) -> str:
    """Determine which STT provider to use.

@@ -218,6 +238,9 @@ def _get_provider(stt_config: dict) -> str:
                return "local"
            if _has_local_command():
                return "local_command"
+            # Try lazy-install before giving up
+            if _try_lazy_install_stt():
+                return "local"
            logger.warning(
                "STT provider 'local' configured but unavailable "
                "(install faster-whisper or set HERMES_LOCAL_STT_COMMAND)"
@@ -285,6 +308,9 @@ def _get_provider(stt_config: dict) -> str:
        return "local"
    if _has_local_command():
        return "local_command"
+    # Try lazy-install before falling through to cloud providers
+    if _try_lazy_install_stt():
+        return "local"
    if _HAS_OPENAI and get_env_value("GROQ_API_KEY"):
        logger.info("No local STT available, using Groq Whisper API")
        return "groq"
@@ -403,7 +429,8 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
    global _local_model, _local_model_name

    if not _HAS_FASTER_WHISPER:
-        return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
+        if not _try_lazy_install_stt():
+            return {"success": False, "transcript": "", "error": "faster-whisper not installed"}

    try:
        # Lazy-load the model (downloads on first use, ~150 MB for 'base')
@@ -914,7 +941,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
        raise ValueError(message)

    return managed_gateway.nous_user_token, urljoin(
-        f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
+        f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
    )


@@ -2048,7 +2048,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
        raise ValueError(message)

    return managed_gateway.nous_user_token, urljoin(
-        f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
+        f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
    )


@@ -58,6 +58,8 @@ _HERMES_CORE_TOOLS = [
    "cronjob",
    # Cross-platform messaging (gated on gateway running via check_fn)
    "send_message",
+    # App integrations (500+ apps via Nous tool gateway, gated via check_fn)
+    "app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections",
    # Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
    "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
    # Kanban multi-agent coordination — only in schema when the agent is
@@ -239,6 +241,12 @@ TOOLSETS = {
        "includes": []
    },

+    "app_tools": {
+        "description": "External app integrations (Gmail, Slack, GitHub, Notion, 500+ apps) via Nous tool gateway",
+        "tools": ["app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"],
+        "includes": []
+    },
+
    "kanban": {
        "description": (
            "Kanban multi-agent coordination — only active when the agent "
@@ -1061,6 +1061,10 @@ def _session_tool_progress_mode(sid: str) -> str:
    return str(_sessions.get(sid, {}).get("tool_progress_mode", "all") or "all")


+def _session_verbose(sid: str) -> bool:
+    return _session_tool_progress_mode(sid) == "verbose"
+
+
 def _tool_progress_enabled(sid: str) -> bool:
    return _session_tool_progress_mode(sid) != "off"

@@ -1492,6 +1496,74 @@ def _tool_ctx(name: str, args: dict) -> str:
        return ""


+_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000
+_TUI_VERBOSE_TEXT_MAX_LINES = 240
+
+
+def _cap_tui_verbose_text(text: str) -> str:
+    if (
+        len(text) <= _TUI_VERBOSE_TEXT_MAX_CHARS
+        and text.count("\n") < _TUI_VERBOSE_TEXT_MAX_LINES
+    ):
+        return text
+
+    idx = len(text)
+    start = 0
+    for _ in range(_TUI_VERBOSE_TEXT_MAX_LINES):
+        idx = text.rfind("\n", 0, idx)
+        if idx < 0:
+            start = 0
+            break
+        start = idx + 1
+
+    line_start = start
+    start = max(line_start, len(text) - _TUI_VERBOSE_TEXT_MAX_CHARS)
+    if start > line_start:
+        next_break = text.find("\n", start)
+        if 0 <= next_break < len(text) - 1:
+            start = next_break + 1
+
+    tail = text[start:].lstrip()
+    omitted_chars = max(0, len(text) - len(tail))
+    omitted_lines = text[:start].count("\n")
+    if omitted_lines:
+        label = (
+            "[showing verbose tail; omitted "
+            f"{omitted_lines} lines / {omitted_chars} chars]\n"
+        )
+    else:
+        label = f"[showing verbose tail; omitted {omitted_chars} chars]\n"
+    return f"{label}{tail}"
+
+
+def _redact_tui_verbose_text(text: str) -> str:
+    try:
+        from agent.redact import redact_sensitive_text
+
+        redacted = redact_sensitive_text(str(text), force=True)
+    except Exception:
+        return ""
+    return _cap_tui_verbose_text(redacted)
+
+
+def _tool_args_text(args: dict) -> str:
+    try:
+        raw = json.dumps(args or {}, indent=2, ensure_ascii=False, default=str)
+    except Exception:
+        raw = str(args or {})
+    return _redact_tui_verbose_text(raw)
+
+
+def _tool_result_text(result: object) -> str:
+    try:
+        from agent.tool_dispatch_helpers import _multimodal_text_summary
+
+        raw = _multimodal_text_summary(result)
+    except Exception:
+        raw = str(result)
+    return _redact_tui_verbose_text(raw)
+
+
 def _fmt_tool_duration(seconds: float | None) -> str:
    if seconds is None:
        return ""
@@ -1553,13 +1625,18 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
            pass
        session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
    if _tool_progress_enabled(sid):
+        payload = {
+            "tool_id": tool_call_id,
+            "name": name,
+            "context": _tool_ctx(name, args),
+        }
+        if _session_verbose(sid):
+            args_text = _tool_args_text(args)
+            if args_text:
+                payload["args_text"] = args_text
        # tool.complete is the source of truth for todos (full list from the
        # tool result). args.todos here may be a partial merge update.
-        _emit(
-            "tool.start",
-            sid,
-            {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
-        )
+        _emit("tool.start", sid, payload)


 def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
@@ -1576,6 +1653,10 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
    summary = _tool_summary(name, result, duration_s)
    if summary:
        payload["summary"] = summary
+    if _session_verbose(sid):
+        result_text = _tool_result_text(result)
+        if result_text:
+            payload["result_text"] = result_text
    if name == "todo":
        try:
            data = json.loads(result)
@@ -1615,7 +1696,10 @@ def _on_tool_progress(
        _emit("tool.progress", sid, {"name": name, "preview": preview or ""})
        return
    if event_type == "reasoning.available" and preview:
-        _emit("reasoning.available", sid, {"text": str(preview)})
+        payload: dict[str, object] = {"text": str(preview)}
+        if _session_verbose(sid):
+            payload["verbose"] = True
+        _emit("reasoning.available", sid, payload)
        return
    if event_type.startswith("subagent."):
        payload = {
@@ -1691,7 +1775,11 @@ def _agent_cbs(sid: str) -> dict:
        "tool_gen_callback": lambda name: _tool_progress_enabled(sid)
        and _emit("tool.generating", sid, {"name": name}),
        "thinking_callback": lambda text: _emit("thinking.delta", sid, {"text": text}),
-        "reasoning_callback": lambda text: _emit("reasoning.delta", sid, {"text": text}),
+        "reasoning_callback": lambda text: _emit(
+            "reasoning.delta",
+            sid,
+            {"text": text, **({"verbose": True} if _session_verbose(sid) else {})},
+        ),
        "status_callback": lambda kind, text=None: _status_update(
            sid, str(kind), None if text is None else str(text)
        ),
@@ -342,6 +342,25 @@ describe('createGatewayEventHandler', () => {
    expect(appended[appended.length - 1]).toMatchObject({ role: 'assistant', text: 'final answer' })
  })

+  it('shows verbose reasoning even when normal reasoning display is off', () => {
+    vi.useFakeTimers()
+    patchUiState({ showReasoning: false })
+    const appended: Msg[] = []
+    const streamed = 'verbose-only reasoning'
+
+    try {
+      const onEvent = createGatewayEventHandler(buildCtx(appended))
+
+      onEvent({ payload: { text: streamed, verbose: true }, type: 'reasoning.delta' } as any)
+      vi.runOnlyPendingTimers()
+
+      expect(turnController.reasoningText).toBe(streamed)
+      expect(getTurnState().reasoning).toBe(streamed)
+    } finally {
+      vi.useRealTimers()
+    }
+  })
+
  it('ignores fallback reasoning.available when streamed reasoning already exists', () => {
    const appended: Msg[] = []
    const streamed = 'short streamed reasoning'
@@ -485,6 +504,25 @@ describe('createGatewayEventHandler', () => {
    expect(appended[3]?.text).not.toContain('```diff')
  })

+  it('keeps verbose result text on inline_diff tool completions', () => {
+    const appended: Msg[] = []
+    const onEvent = createGatewayEventHandler(buildCtx(appended))
+    const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+
+    onEvent({
+      payload: { args_text: '{ "path": "foo.ts" }', context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
+      type: 'tool.start'
+    } as any)
+    onEvent({
+      payload: { inline_diff: diff, result_text: 'patched result', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
+
+    expect(turnController.segmentMessages[0]).toMatchObject({ kind: 'diff' })
+    expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Args:\n{ "path": "foo.ts" }')
+    expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Result:\npatched result')
+  })
+
  it('keeps full final responses from duplicating flushed pre-diff narration', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
@@ -222,6 +222,21 @@ describe('createSlashHandler', () => {
    expect(ctx.gateway.rpc).not.toHaveBeenCalled()
  })

+  it('keeps visible scrollback when branching a TUI session', async () => {
+    patchUiState({ sid: 'sid-parent' })
+    const rpc = vi.fn(() => Promise.resolve({ session_id: 'sid-branch', title: 'branch title' }))
+    const ctx = buildCtx({ gateway: { ...buildGateway(), rpc } })
+
+    expect(createSlashHandler(ctx)('/branch branch title')).toBe(true)
+
+    expect(rpc).toHaveBeenCalledWith('session.branch', { name: 'branch title', session_id: 'sid-parent' })
+    await vi.waitFor(() => {
+      expect(getUiState().sid).toBe('sid-branch')
+      expect(ctx.transcript.sys).toHaveBeenCalledWith('branched → branch title')
+    })
+    expect(ctx.transcript.setHistoryItems).not.toHaveBeenCalled()
+  })
+
  it('reloads skills in the live gateway and refreshes the catalog', async () => {
    const rpc = vi.fn((method: string) => {
      if (method === 'skills.reload') {
@@ -16,4 +16,16 @@ describe('composerPromptText', () => {
    expect(composerPromptText('❯', 'custom')).toBe('❯')
    expect(composerPromptText('❯')).toBe('❯')
  })
+
+  it('uses a Termux-safe ASCII prompt marker in normal mode', () => {
+    expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
+  })
+
+  it('keeps profile prefix suppressed on narrow Termux widths', () => {
+    expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
+  })
+
+  it('allows profile prefix on very wide Termux panes', () => {
+    expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
+  })
 })
@@ -0,0 +1,40 @@
+import { describe, expect, it } from 'vitest'
+
+import { stableComposerColumns, transcriptBodyWidth } from '../lib/inputMetrics.js'
+import { composerPromptText } from '../lib/prompt.js'
+
+describe('Termux composer prompt + width guards', () => {
+  it('uses a single-cell ASCII prompt marker in Termux mode', () => {
+    expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
+  })
+
+  it('suppresses profile prefixes on narrow Termux panes', () => {
+    expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
+  })
+
+  it('keeps profile context on very wide Termux panes', () => {
+    expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
+  })
+
+  it('reserves fewer columns for gutter on narrow Termux widths', () => {
+    // 32 columns after prompt: desktop reserves 2 for transcript scrollbar,
+    // Termux keeps those 2 columns for the active composer.
+    expect(stableComposerColumns(40, 8, false)).toBe(28)
+    expect(stableComposerColumns(40, 8, true)).toBe(30)
+
+    // With ample room, Termux still reserves the gutter for alignment.
+    expect(stableComposerColumns(60, 8, true)).toBe(48)
+  })
+
+  it('never over-allocates transcript body width on narrow panes', () => {
+    // Old behavior hard-minned to 20 columns and overflowed narrow layouts.
+    expect(transcriptBodyWidth(24, 'assistant', '>', true)).toBe(19)
+    expect(transcriptBodyWidth(24, 'user', 'upstr >', true)).toBe(14)
+    expect(transcriptBodyWidth(10, 'user', '>', true)).toBeGreaterThanOrEqual(1)
+  })
+
+  it('keeps legacy desktop floor outside Termux mode', () => {
+    expect(transcriptBodyWidth(24, 'assistant', '>')).toBe(20)
+    expect(transcriptBodyWidth(24, 'user', 'upstr >')).toBe(20)
+  })
+})
@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
 import {
  boundedLiveRenderText,
  buildToolTrailLine,
+  buildVerboseToolTrailLine,
  edgePreview,
  estimateRows,
  estimateTokensRough,
@@ -12,8 +13,8 @@ import {
  lastCotTrailIndex,
  parseToolTrailResultLine,
  pasteTokenLabel,
-  sanitizeAnsiForRender,
  sameToolTrailGroup,
+  sanitizeAnsiForRender,
  splitToolDuration,
  stripAnsi,
  thinkingPreview
@@ -37,6 +38,39 @@ describe('buildToolTrailLine', () => {
  })
 })

+describe('buildVerboseToolTrailLine', () => {
+  it('preserves multiline args and result details', () => {
+    const line = buildVerboseToolTrailLine(
+      'terminal',
+      'npm test',
+      false,
+      1.25,
+      '{\n  "cmd": "npm test"\n}',
+      'first line\nsecond :: line'
+    )
+
+    expect(line).toContain('Args:\n{')
+    expect(line).toContain('Result:\nfirst line\nsecond :: line')
+    expect(parseToolTrailResultLine(line)).toEqual({
+      call: 'Terminal("npm test") (1.3s)',
+      detail: 'Args:\n{\n  "cmd": "npm test"\n}\nResult:\nfirst line\nsecond :: line',
+      mark: '✓'
+    })
+  })
+
+  it('labels verbose failures as errors', () => {
+    const line = buildVerboseToolTrailLine('terminal', 'npm test', true, 0.5, undefined, 'command failed')
+
+    expect(line).toContain('Error:\ncommand failed')
+    expect(line).not.toContain('Result:\ncommand failed')
+    expect(parseToolTrailResultLine(line)).toEqual({
+      call: 'Terminal("npm test") (0.5s)',
+      detail: 'Error:\ncommand failed',
+      mark: '✗'
+    })
+  })
+})
+
 describe('lastCotTrailIndex', () => {
  it('finds last non-result line', () => {
    expect(lastCotTrailIndex(['a ✓', 'thinking…'])).toBe(1)
@@ -178,7 +178,22 @@ describe('supportsFastEchoTerminal', () => {
    expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'Apple_Terminal' } as NodeJS.ProcessEnv)).toBe(false)
  })

-  it('keeps fast-echo enabled in VS Code and unknown terminals', () => {
+  it('disables fast-echo by default in Termux mode', () => {
+    expect(
+      supportsFastEchoTerminal({ TERMUX_VERSION: '0.118.0', PREFIX: '/data/data/com.termux/files/usr' } as NodeJS.ProcessEnv)
+    ).toBe(false)
+  })
+
+  it('allows explicit Termux fast-echo opt-in via env override', () => {
+    expect(
+      supportsFastEchoTerminal({
+        HERMES_TUI_TERMUX_FAST_ECHO: '1',
+        TERMUX_VERSION: '0.118.0'
+      } as NodeJS.ProcessEnv)
+    ).toBe(true)
+  })
+
+  it('keeps fast-echo enabled in VS Code and unknown non-Termux terminals', () => {
    expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe(true)
    expect(supportsFastEchoTerminal({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
  })
@@ -491,13 +491,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:

      case 'reasoning.delta':
        if (ev.payload?.text) {
-          turnController.recordReasoningDelta(ev.payload.text)
+          turnController.recordReasoningDelta(ev.payload.text, Boolean(ev.payload.verbose))
        }

        return

      case 'reasoning.available':
-        turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''))
+        turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''), Boolean(ev.payload?.verbose))

        return

@@ -517,12 +517,18 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:

      case 'tool.start':
        turnController.recordTodos(ev.payload.todos)
-        turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
+        turnController.recordToolStart(
+          ev.payload.tool_id,
+          ev.payload.name ?? 'tool',
+          ev.payload.context ?? '',
+          ev.payload.args_text ? stripAnsi(String(ev.payload.args_text)) : undefined
+        )

        return
      case 'tool.complete': {
        const inlineDiffText =
          ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
+        const resultText = ev.payload.result_text ? stripAnsi(String(ev.payload.result_text)) : undefined

        if (inlineDiffText) {
          turnController.recordInlineDiffToolComplete(
@@ -530,7 +536,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
            ev.payload.tool_id,
            ev.payload.name,
            ev.payload.error,
-            ev.payload.duration_s
+            ev.payload.duration_s,
+            resultText
          )
        } else {
          turnController.recordToolComplete(
@@ -539,7 +546,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
            ev.payload.error,
            ev.payload.summary,
            ev.payload.duration_s,
-            ev.payload.todos
+            ev.payload.todos,
+            resultText
          )
        }

@@ -212,7 +212,6 @@ export const sessionCommands: SlashCommand[] = [
          void ctx.session.closeSession(prevSid)
          patchUiState({ sid: r.session_id })
          ctx.session.setSessionStartedAt(Date.now())
-          ctx.transcript.setHistoryItems([])
          ctx.transcript.sys(`branched → ${r.title ?? ''}`)
        })
      )
@@ -11,6 +11,7 @@ import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
 import {
  boundedLiveRenderText,
  buildToolTrailLine,
+  buildVerboseToolTrailLine,
  estimateTokensRough,
  isTransientTrailLine,
  sameToolTrailGroup,
@@ -542,8 +543,8 @@ class TurnController {
    }
  }

-  recordReasoningAvailable(text: string) {
-    if (this.interrupted || !getUiState().showReasoning) {
+  recordReasoningAvailable(text: string, force = false) {
+    if (this.interrupted || (!force && !getUiState().showReasoning)) {
      return
    }

@@ -560,8 +561,8 @@ class TurnController {
    this.pulseReasoningStreaming()
  }

-  recordReasoningDelta(text: string) {
-    if (this.interrupted || !getUiState().showReasoning) {
+  recordReasoningDelta(text: string, force = false) {
+    if (this.interrupted || (!force && !getUiState().showReasoning)) {
      return
    }

@@ -587,14 +588,15 @@ class TurnController {
    error?: string,
    summary?: string,
    duration?: number,
-    todos?: unknown
+    todos?: unknown,
+    resultText?: string
  ) {
    if (this.interrupted) {
      return
    }

    this.recordTodos(todos)
-    const line = this.completeTool(toolId, fallbackName, error, summary, duration)
+    const line = this.completeTool(toolId, fallbackName, error, summary, duration, resultText)

    this.pendingSegmentTools = [...this.pendingSegmentTools, line]
    this.flushPendingToolsIntoLastSegment()
@@ -606,30 +608,42 @@ class TurnController {
    toolId: string,
    fallbackName?: string,
    error?: string,
-    duration?: number
+    duration?: number,
+    resultText?: string
  ) {
    if (this.interrupted) {
      return
    }

    this.flushStreamingSegment()
-    this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)])
+    this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration, resultText)])
    this.publishToolState()
  }

-  private completeTool(toolId: string, fallbackName?: string, error?: string, summary?: string, duration?: number) {
+  private completeTool(
+    toolId: string,
+    fallbackName?: string,
+    error?: string,
+    summary?: string,
+    duration?: number,
+    resultText?: string
+  ) {
    const done = this.activeTools.find(tool => tool.id === toolId)
    const name = done?.name ?? fallbackName ?? 'tool'
    const label = toolTrailLabel(name)
    const fallbackDuration = done?.startedAt ? (Date.now() - done.startedAt) / 1000 : undefined

-    const line = buildToolTrailLine(
-      name,
-      done?.context || '',
-      Boolean(error),
-      error || summary || '',
-      duration ?? fallbackDuration
-    )
+    const line =
+      done?.verboseArgs || resultText
+        ? buildVerboseToolTrailLine(
+            name,
+            done?.context || '',
+            Boolean(error),
+            duration ?? fallbackDuration,
+            done?.verboseArgs,
+            error || resultText || summary || ''
+          )
+        : buildToolTrailLine(name, done?.context || '', Boolean(error), error || summary || '', duration ?? fallbackDuration)

    this.activeTools = this.activeTools.filter(tool => tool.id !== toolId)

@@ -675,7 +689,7 @@ class TurnController {
    }, STREAM_BATCH_MS)
  }

-  recordToolStart(toolId: string, name: string, context: string) {
+  recordToolStart(toolId: string, name: string, context: string, verboseArgs?: string) {
    if (this.interrupted) {
      return
    }
@@ -688,7 +702,7 @@ class TurnController {
    const sample = `${name} ${context}`.trim()

    this.toolTokenAcc += sample ? estimateTokensRough(sample) : 0
-    this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now() }]
+    this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now(), verboseArgs }]

    patchTurnState({ toolTokens: this.toolTokenAcc, tools: this.activeTools })
  }
@@ -6,7 +6,7 @@ import { useGateway } from '../app/gatewayContext.js'
 import type { AppLayoutProps } from '../app/interfaces.js'
 import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
 import { $uiState } from '../app/uiStore.js'
-import { INLINE_MODE, SHOW_FPS } from '../config/env.js'
+import { INLINE_MODE, SHOW_FPS, TERMUX_TUI_MODE } from '../config/env.js'
 import { PLACEHOLDER } from '../content/placeholders.js'
 import {
  COMPOSER_PROMPT_GAP_WIDTH,
@@ -169,10 +169,10 @@ const ComposerPane = memo(function ComposerPane({
  const ui = useStore($uiState)
  const isBlocked = useStore($isBlocked)
  const sh = (composer.inputBuf[0] ?? composer.input).startsWith('!')
-  const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh)
+  const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh, TERMUX_TUI_MODE, composer.cols)
  const promptWidth = composerPromptWidth(promptText)
  const promptBlank = ' '.repeat(promptWidth)
-  const inputColumns = stableComposerColumns(composer.cols, promptWidth)
+  const inputColumns = stableComposerColumns(composer.cols, promptWidth, TERMUX_TUI_MODE)
  const inputHeight = inputVisualHeight(composer.input, inputColumns)
  const inputMouseRef = useRef<null | TextInputMouseApi>(null)

@@ -1,6 +1,7 @@
 import { Ansi, Box, NoSelect, Text } from '@hermes/ink'
 import { memo, useState } from 'react'

+import { TERMUX_TUI_MODE } from '../config/env.js'
 import { LONG_MSG } from '../config/limits.js'
 import { sectionMode } from '../domain/details.js'
 import { userDisplay } from '../domain/messages.js'
@@ -139,7 +140,7 @@ export const MessageLine = memo(function MessageLine({
    }

    if (msg.role === 'assistant') {
-      const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt)
+      const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)

      return isStreaming ? (
        // Incremental markdown: split at the last stable block boundary so
@@ -201,7 +202,7 @@ export const MessageLine = memo(function MessageLine({
          </Text>
        </NoSelect>

-        <Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt)}>{content}</Box>
+        <Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)}>{content}</Box>
      </Box>
    </Box>
  )
@@ -13,6 +13,7 @@ import {
  isVoiceToggleKey,
  type ParsedVoiceRecordKey
 } from '../lib/platform.js'
+import { isTermuxTuiMode } from '../lib/termux.js'

 type InkExt = typeof Ink & {
  stringWidth: (s: string) => number
@@ -298,7 +299,23 @@ export function canFastBackspaceShape(current: string, cursor: number, columns?:
 export function supportsFastEchoTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
  // Terminal.app still shows paint/cursor artifacts under the fast-echo
  // bypass path. Fall back to the normal Ink render path there.
-  return (env.TERM_PROGRAM ?? '').trim() !== 'Apple_Terminal'
+  if ((env.TERM_PROGRAM ?? '').trim() === 'Apple_Terminal') {
+    return false
+  }
+
+  // Termux terminals are especially sensitive to bypass-path cursor drift and
+  // stale paints at soft-wrap boundaries on tall/narrow viewports. Keep this
+  // off by default in Termux mode; allow explicit opt-in for local debugging.
+  if (isTermuxTuiMode(env)) {
+    const override = String(env.HERMES_TUI_TERMUX_FAST_ECHO ?? '').trim().toLowerCase()
+    if (override) {
+      return /^(?:1|true|yes|on)$/i.test(override)
+    }
+
+    return false
+  }
+
+  return true
 }

 function renderWithCursor(value: string, cursor: number) {
@@ -856,7 +856,16 @@ export const ToolTrail = memo(function ToolTrail({
      color: t.color.text,
      key: tool.id,
      label,
-      details: [],
+      details: tool.verboseArgs
+        ? [
+            {
+              color: t.color.muted,
+              content: `Args:\n${boundedLiveRenderText(tool.verboseArgs)}`,
+              dimColor: true,
+              key: `${tool.id}-args`
+            }
+          ]
+        : [],
      content: (
        <>
          <Spinner color={t.color.accent} variant="tool" /> {label}
@@ -477,11 +477,11 @@ export type GatewayEvent =
      type: 'gateway.start_timeout'
    }
  | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }
-  | { payload?: { text?: string }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
+  | { payload?: { text?: string; verbose?: boolean }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
  | { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' }
  | { payload: { name?: string }; session_id?: string; type: 'tool.generating' }
  | {
-      payload: { context?: string; name?: string; tool_id: string; todos?: unknown[] }
+      payload: { args_text?: string; context?: string; name?: string; tool_id: string; todos?: unknown[] }
      session_id?: string
      type: 'tool.start'
    }
@@ -491,6 +491,7 @@ export type GatewayEvent =
        error?: string
        inline_diff?: string
        name?: string
+        result_text?: string
        summary?: string
        tool_id: string
        todos?: unknown[]
@@ -177,14 +177,25 @@ export function transcriptGutterWidth(role: Role, userPrompt: string) {
  return role === 'user' ? composerPromptWidth(userPrompt) : 3
 }

-export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string) {
-  return Math.max(20, totalCols - transcriptGutterWidth(role, userPrompt) - 2)
+export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string, termuxMode = false) {
+  const available = Math.max(1, totalCols - transcriptGutterWidth(role, userPrompt) - 2)
+
+  if (termuxMode) {
+    // On narrow / unusual aspect-ratio mobile panes, forcing a wide minimum
+    // width causes right-edge clipping and chopped words.
+    return available
+  }
+
+  return Math.max(20, available)
 }

-export function stableComposerColumns(totalCols: number, promptWidth: number) {
+export function stableComposerColumns(totalCols: number, promptWidth: number, termuxMode = false) {
  // Physical render/wrap width. Always reserve outer composer padding and
  // prompt prefix. Only reserve the transcript scrollbar gutter when the
  // terminal is wide enough; on narrow panes, preserving input columns beats
  // keeping gutters visually aligned.
-  return Math.max(1, totalCols - promptWidth - 2 - (totalCols - promptWidth >= 24 ? 2 : 0))
+  const afterPrompt = totalCols - promptWidth
+  const reserveScrollbar = afterPrompt >= (termuxMode ? 36 : 24) ? 2 : 0
+
+  return Math.max(1, totalCols - promptWidth - 2 - reserveScrollbar)
 }
@@ -1,8 +1,32 @@
-export function composerPromptText(prompt: string, profileName?: null | string, shellMode = false): string {
+const TERMUX_SAFE_PROMPT = '>'
+
+export function composerPromptText(
+  prompt: string,
+  profileName?: null | string,
+  shellMode = false,
+  termuxMode = false,
+  totalCols?: number
+): string {
  if (shellMode) {
    return '$'
  }

+  if (termuxMode) {
+    // Termux fonts/terminal backends can render decorative prompt glyphs with
+    // ambiguous width; keep the live composer marker strictly single-cell ASCII
+    // so we never leave stale arrow artifacts while typing.
+    const basePrompt = TERMUX_SAFE_PROMPT
+
+    // On very wide panes we can still include profile context. On narrow/mobile
+    // panes this burns precious columns and increases wrap/clipping risk.
+    const wideEnoughForProfile = typeof totalCols === 'number' ? totalCols >= 90 : false
+    if (wideEnoughForProfile && profileName && !['default', 'custom'].includes(profileName)) {
+      return `${profileName} ${basePrompt}`
+    }
+
+    return basePrompt
+  }
+
  if (profileName && !['default', 'custom'].includes(profileName)) {
    return `${profileName} ${prompt}`
  }
@@ -212,6 +212,28 @@ export const buildToolTrailLine = (
  return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
 }

+const verboseToolBlock = (label: string, text?: string) => {
+  const body = (text ?? '').trim()
+
+  return body ? `${label}:\n${boundedLiveRenderText(body)}` : ''
+}
+
+export const buildVerboseToolTrailLine = (
+  name: string,
+  context: string,
+  error?: boolean,
+  duration?: number,
+  argsText?: string,
+  resultText?: string
+) => {
+  const detail = [verboseToolBlock('Args', argsText), verboseToolBlock(error ? 'Error' : 'Result', resultText)]
+    .filter(Boolean)
+    .join('\n')
+  const took = duration !== undefined ? ` (${duration.toFixed(1)}s)` : ''
+
+  return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
+}
+
 export const isToolTrailResultLine = (line: string) => line.endsWith(' ✓') || line.endsWith(' ✗')

 export const parseToolTrailResultLine = (line: string) => {
@@ -221,10 +243,10 @@ export const parseToolTrailResultLine = (line: string) => {

  const mark = line.endsWith(' ✗') ? '✗' : '✓'
  const body = line.slice(0, -2)
-  const [call, detail] = body.split(' :: ', 2)
+  const sep = body.indexOf(' :: ')

-  if (detail != null) {
-    return { call, detail, mark }
+  if (sep >= 0) {
+    return { call: body.slice(0, sep), detail: body.slice(sep + 4), mark }
  }

  const legacy = body.indexOf(': ')
@@ -1,5 +1,6 @@
 import type { Msg } from '../types.js'

+import { TERMUX_TUI_MODE } from '../config/env.js'
 import { transcriptBodyWidth } from './inputMetrics.js'

 const hashText = (text: string) => {
@@ -96,7 +97,7 @@ export const estimatedMsgHeight = (
    return Math.max(2, msg.todos.length + 2)
  }

-  const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt)
+  const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt, TERMUX_TUI_MODE)
  const text = msg.text
  let h = wrappedLines(text || ' ', bodyWidth)

@@ -2,6 +2,7 @@ export interface ActiveTool {
  context?: string
  id: string
  name: string
+  verboseArgs?: string
  startedAt?: number
 }

@@ -44,11 +44,13 @@ const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
  { key: "vision", label: "Vision", hint: "Image analysis" },
  { key: "web_extract", label: "Web Extract", hint: "Page summarization" },
  { key: "compression", label: "Compression", hint: "Context compaction" },
-  { key: "session_search", label: "Session Search", hint: "Recall queries" },
  { key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
  { key: "approval", label: "Approval", hint: "Smart auto-approve" },
  { key: "mcp", label: "MCP", hint: "MCP tool routing" },
  { key: "title_generation", label: "Title Gen", hint: "Session titles" },
+  { key: "triage_specifier", label: "Triage Specifier", hint: "Kanban spec fleshing" },
+  { key: "kanban_decomposer", label: "Kanban Decomposer", hint: "Task decomposition" },
+  { key: "profile_describer", label: "Profile Describer", hint: "Auto profile descriptions" },
  { key: "curator", label: "Curator", hint: "Skill-usage review" },
 ] as const;