fix: follow-up fixes for TinyFish browser provider salvage

- Remove ENV_VARS_BY_VERSION[23] entry: adding optional env vars does not require a config version bump (deep-merge handles it) - Replace change-detector test (assert _config_version == 23) with invariant test (assert positive int) - Add TinyFish case to setup.py missing_browser_hint - Add TINYFISH_BROWSER_TIMEOUT to set_config_value allowed keys - Add contributor simantak-dabhade to AUTHOR_MAP
feat(tools): add TinyFish cloud browser provider
2026-05-03 14:47:45 +05:30 · 2026-05-03 14:46:10 +05:30 · 2026-05-03 01:54:24 -07:00 · 2026-05-03 01:51:33 -07:00 · 2026-05-03 01:51:33 -07:00 · 2026-05-03 01:44:23 -07:00
74 changed files with 6286 additions and 266 deletions
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import contextvars
+import json
 import logging
 import os
 from collections import defaultdict, deque
@@ -47,6 +48,7 @@ from acp.schema import (
    TextContentBlock,
    UnstructuredCommandInput,
    Usage,
+    UsageUpdate,
    UserMessageChunk,
 )

@@ -65,6 +67,7 @@ from acp_adapter.events import (
 )
 from acp_adapter.permissions import make_approval_callback
 from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
+from acp_adapter.tools import build_tool_complete, build_tool_start

 logger = logging.getLogger(__name__)

@@ -315,6 +318,66 @@ class HermesACPAgent(acp.Agent):

        return target_provider, new_model

+    @staticmethod
+    def _build_usage_update(state: SessionState) -> UsageUpdate | None:
+        """Build ACP native context-usage data for clients like Zed.
+
+        Zed's circular context indicator is driven by ACP ``usage_update``
+        session updates: ``size`` is the model context window and ``used`` is
+        the current request pressure.  Hermes estimates ``used`` from the same
+        buckets it sends to providers: system prompt, conversation history, and
+        tool schemas.
+        """
+        agent = state.agent
+        compressor = getattr(agent, "context_compressor", None)
+        size = int(getattr(compressor, "context_length", 0) or 0)
+        if size <= 0:
+            return None
+
+        try:
+            from agent.model_metadata import estimate_request_tokens_rough
+
+            used = estimate_request_tokens_rough(
+                state.history,
+                system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
+                tools=getattr(agent, "tools", None) or None,
+            )
+        except Exception:
+            logger.debug("Could not estimate ACP native context usage", exc_info=True)
+            used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
+
+        return UsageUpdate(
+            session_update="usage_update",
+            size=max(size, 0),
+            used=max(used, 0),
+        )
+
+    async def _send_usage_update(self, state: SessionState) -> None:
+        """Send ACP native context usage to the connected client."""
+        if not self._conn:
+            return
+        update = self._build_usage_update(state)
+        if update is None:
+            return
+        try:
+            await self._conn.session_update(
+                session_id=state.session_id,
+                update=update,
+            )
+        except Exception:
+            logger.warning(
+                "Failed to send ACP usage update for session %s",
+                state.session_id,
+                exc_info=True,
+            )
+
+    def _schedule_usage_update(self, state: SessionState) -> None:
+        """Schedule native context indicator refresh after ACP responses."""
+        if not self._conn:
+            return
+        loop = asyncio.get_running_loop()
+        loop.call_soon(asyncio.create_task, self._send_usage_update(state))
+
    async def _register_session_mcp_servers(
        self,
        state: SessionState,
@@ -485,37 +548,99 @@ class HermesACPAgent(acp.Agent):
            )
        return None

+    @staticmethod
+    def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
+        """Extract function name/arguments from an OpenAI-style tool_call."""
+        function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
+        name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
+        raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
+        if isinstance(raw_args, str):
+            try:
+                parsed = json.loads(raw_args)
+            except Exception:
+                parsed = {"raw": raw_args}
+            raw_args = parsed
+        if not isinstance(raw_args, dict):
+            raw_args = {}
+        return name, raw_args
+
+    @staticmethod
+    def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
+        """Return the stable provider tool call id for ACP history replay."""
+        return str(
+            tool_call.get("id")
+            or tool_call.get("call_id")
+            or tool_call.get("tool_call_id")
+            or ""
+        ).strip()
+
    async def _replay_session_history(self, state: SessionState) -> None:
        """Send persisted user/assistant history to clients during session/load.

        Zed's ACP history UI calls ``session/load`` after the user picks an item
        from the Agents sidebar. The agent must then replay the full conversation
-        as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
-        restoring server-side state makes Hermes remember context, but leaves the
-        editor looking like a clean thread.
+        as user/assistant chunks plus reconstructed tool-call start/completion
+        notifications; merely restoring server-side state makes Hermes remember
+        context, but leaves the editor looking like a clean thread.
        """
        if not self._conn or not state.history:
            return

-        for message in state.history:
-            role = str(message.get("role") or "")
-            if role not in {"user", "assistant"}:
-                continue
-            text = self._history_message_text(message)
-            if not text:
-                continue
-            update = self._history_message_update(role=role, text=text)
-            if update is None:
-                continue
+        active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
+
+        async def _send(update: Any) -> bool:
            try:
                await self._conn.session_update(session_id=state.session_id, update=update)
+                return True
            except Exception:
                logger.warning(
                    "Failed to replay ACP history for session %s",
                    state.session_id,
                    exc_info=True,
                )
-                return
+                return False
+
+        for message in state.history:
+            role = str(message.get("role") or "")
+
+            if role in {"user", "assistant"}:
+                text = self._history_message_text(message)
+                if text:
+                    update = self._history_message_update(role=role, text=text)
+                    if update is not None and not await _send(update):
+                        return
+
+            if role == "assistant" and isinstance(message.get("tool_calls"), list):
+                for tool_call in message["tool_calls"]:
+                    if not isinstance(tool_call, dict):
+                        continue
+                    tool_call_id = self._history_tool_call_id(tool_call)
+                    if not tool_call_id:
+                        continue
+                    tool_name, args = self._history_tool_call_name_args(tool_call)
+                    active_tool_calls[tool_call_id] = (tool_name, args)
+                    if not await _send(build_tool_start(tool_call_id, tool_name, args)):
+                        return
+                continue
+
+            if role == "tool":
+                tool_call_id = str(message.get("tool_call_id") or "").strip()
+                tool_name = str(message.get("tool_name") or "").strip()
+                function_args: dict[str, Any] | None = None
+                if tool_call_id in active_tool_calls:
+                    tool_name, function_args = active_tool_calls.pop(tool_call_id)
+                if not tool_call_id or not tool_name:
+                    continue
+                result = message.get("content")
+                if not await _send(
+                    build_tool_complete(
+                        tool_call_id,
+                        tool_name,
+                        result=result if isinstance(result, str) else None,
+                        function_args=function_args,
+                    )
+                ):
+                    return

    async def new_session(
        self,
@@ -527,11 +652,24 @@ class HermesACPAgent(acp.Agent):
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("New session %s (cwd=%s)", state.session_id, cwd)
        self._schedule_available_commands_update(state.session_id)
+        self._schedule_usage_update(state)
        return NewSessionResponse(
            session_id=state.session_id,
            models=self._build_model_state(state),
        )

+    def _schedule_history_replay(self, state: SessionState) -> None:
+        """Replay persisted history after session/load or session/resume returns.
+
+        Zed only attaches streamed transcript/tool updates once the load/resume
+        response has completed. Sending replay notifications while the request is
+        still in-flight can make the server look correct in logs while the editor
+        drops or fails to attach the tool-call history.
+        """
+        loop = asyncio.get_running_loop()
+        replay_coro = self._replay_session_history(state)
+        loop.call_soon(asyncio.create_task, replay_coro)
+
    async def load_session(
        self,
        cwd: str,
@@ -545,8 +683,9 @@ class HermesACPAgent(acp.Agent):
            return None
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
-        await self._replay_session_history(state)
+        self._schedule_history_replay(state)
        self._schedule_available_commands_update(session_id)
+        self._schedule_usage_update(state)
        return LoadSessionResponse(models=self._build_model_state(state))

    async def resume_session(
@@ -562,8 +701,9 @@ class HermesACPAgent(acp.Agent):
            state = self.session_manager.create_session(cwd=cwd)
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
-        await self._replay_session_history(state)
+        self._schedule_history_replay(state)
        self._schedule_available_commands_update(state.session_id)
+        self._schedule_usage_update(state)
        return ResumeSessionResponse(models=self._build_model_state(state))

    async def cancel(self, session_id: str, **kwargs: Any) -> None:
@@ -712,6 +852,7 @@ class HermesACPAgent(acp.Agent):
                if self._conn:
                    update = acp.update_agent_message_text(response_text)
                    await self._conn.session_update(session_id, update)
+                    await self._send_usage_update(state)
                return PromptResponse(stop_reason="end_turn")

        # If Zed sends another regular prompt while the same ACP session is
@@ -744,24 +885,37 @@ class HermesACPAgent(acp.Agent):
        tool_call_meta: dict[str, dict[str, Any]] = {}
        previous_approval_cb = None

+        streamed_message = False
+
        if conn:
            tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
-            thinking_cb = make_thinking_cb(conn, session_id, loop)
+            reasoning_cb = make_thinking_cb(conn, session_id, loop)
            step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
            message_cb = make_message_cb(conn, session_id, loop)
+
+            def stream_delta_cb(text: str) -> None:
+                nonlocal streamed_message
+                if text:
+                    streamed_message = True
+                message_cb(text)
+
            approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
        else:
            tool_progress_cb = None
-            thinking_cb = None
+            reasoning_cb = None
            step_cb = None
-            message_cb = None
+            stream_delta_cb = None
            approval_cb = None

        agent = state.agent
        agent.tool_progress_callback = tool_progress_cb
-        agent.thinking_callback = thinking_cb
+        # ACP thought panes should not receive Hermes' local kawaii waiting/status
+        # updates. Route provider/model reasoning deltas instead; if the provider
+        # emits no reasoning, Zed should not get a fake "thinking" accordion.
+        agent.thinking_callback = None
+        agent.reasoning_callback = reasoning_cb
        agent.step_callback = step_cb
-        agent.message_callback = message_cb
+        agent.stream_delta_callback = stream_delta_cb

        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
        # Set it INSIDE _run_agent so the TLS write happens in the executor
@@ -867,7 +1021,7 @@ class HermesACPAgent(acp.Agent):
                )
            except Exception:
                logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
-        if final_response and conn:
+        if final_response and conn and not streamed_message:
            update = acp.update_agent_message_text(final_response)
            await conn.session_update(session_id, update)

@@ -903,6 +1057,8 @@ class HermesACPAgent(acp.Agent):
                cached_read_tokens=result.get("cache_read_tokens"),
            )

+        await self._send_usage_update(state)
+
        stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
        return PromptResponse(stop_reason=stop_reason, usage=usage)

@@ -1035,22 +1191,84 @@ class HermesACPAgent(acp.Agent):
            return f"Could not list tools: {e}"

    def _cmd_context(self, args: str, state: SessionState) -> str:
+        """Show ACP session context pressure and compression guidance."""
        n_messages = len(state.history)
-        if n_messages == 0:
-            return "Conversation is empty (no messages yet)."
-        # Count by role
+
+        # Count by role.
        roles: dict[str, int] = {}
        for msg in state.history:
            role = msg.get("role", "unknown")
            roles[role] = roles.get(role, 0) + 1
+
+        agent = state.agent
+        model = state.model or getattr(agent, "model", "")
+        provider = getattr(agent, "provider", None) or "auto"
+        compressor = getattr(agent, "context_compressor", None)
+        context_length = int(getattr(compressor, "context_length", 0) or 0)
+        threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
+
+        try:
+            from agent.model_metadata import estimate_request_tokens_rough
+
+            system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
+            tools = getattr(agent, "tools", None) or None
+            approx_tokens = estimate_request_tokens_rough(
+                state.history,
+                system_prompt=system_prompt,
+                tools=tools,
+            )
+        except Exception:
+            logger.debug("Could not estimate ACP context usage", exc_info=True)
+            approx_tokens = 0
+
+        if threshold_tokens <= 0 and context_length > 0:
+            threshold_tokens = int(context_length * 0.80)
+
        lines = [
-            f"Conversation: {n_messages} messages",
+            f"Conversation: {n_messages} messages"
+            if n_messages
+            else "Conversation is empty (no messages yet).",
            f"  user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
            f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
        ]
-        model = state.model or getattr(state.agent, "model", "")
        if model:
            lines.append(f"Model: {model}")
+        lines.append(f"Provider: {provider}")
+
+        if approx_tokens > 0:
+            if context_length > 0:
+                usage_pct = (approx_tokens / context_length) * 100
+                lines.append(
+                    f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
+                )
+            else:
+                lines.append(f"Context usage: ~{approx_tokens:,} tokens")
+
+        if threshold_tokens > 0:
+            if approx_tokens > 0:
+                threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
+                remaining = max(threshold_tokens - approx_tokens, 0)
+                if approx_tokens >= threshold_tokens:
+                    lines.append(
+                        f"Compression: due now (threshold ~{threshold_tokens:,}"
+                        + (f", {threshold_pct:.0f}%" if threshold_pct else "")
+                        + "). Run /compact."
+                    )
+                else:
+                    lines.append(
+                        f"Compression: ~{remaining:,} tokens until threshold "
+                        f"(~{threshold_tokens:,}"
+                        + (f", {threshold_pct:.0f}%" if threshold_pct else "")
+                        + ")."
+                    )
+            else:
+                lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
+
+        if getattr(agent, "compression_enabled", True) is False:
+            lines.append("Compression is disabled for this agent.")
+        else:
+            lines.append("Tip: run /compact to compress manually before the threshold.")
+
        return "\n".join(lines)

    def _cmd_reset(self, args: str, state: SessionState) -> str:
@@ -28,6 +28,11 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
    "terminal": "execute",
    "process": "execute",
    "execute_code": "execute",
+    # Session/meta tools
+    "todo": "other",
+    "skill_view": "read",
+    "skills_list": "read",
+    "skill_manage": "edit",
    # Web / fetch
    "web_search": "fetch",
    "web_extract": "fetch",
@@ -51,6 +56,28 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
 }


+_POLISHED_TOOLS = {
+    # Core operator loop
+    "todo", "memory", "session_search", "delegate_task",
+    # Files / execution
+    "read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
+    # Skills / web / browser / media
+    "skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
+    "browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
+    "browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
+    "vision_analyze", "image_generate", "text_to_speech",
+    # Schedulers / platform integrations
+    "cronjob", "send_message", "clarify", "discord", "discord_admin",
+    "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
+    "feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
+    "feishu_drive_reply_comment", "feishu_drive_add_comment",
+    "kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
+    "kanban_block", "kanban_link", "kanban_heartbeat",
+    "yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
+    "yb_send_dm", "yb_send_sticker", "mixture_of_agents",
+}
+
+
 def get_tool_kind(tool_name: str) -> ToolKind:
    """Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
    return TOOL_KIND_MAP.get(tool_name, "other")
@@ -85,18 +112,645 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
        if urls:
            return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
        return "web extract"
+    if tool_name == "process":
+        action = str(args.get("action") or "").strip() or "manage"
+        sid = str(args.get("session_id") or "").strip()
+        return f"process {action}: {sid}" if sid else f"process {action}"
    if tool_name == "delegate_task":
+        tasks = args.get("tasks")
+        if isinstance(tasks, list) and tasks:
+            return f"delegate batch ({len(tasks)} tasks)"
        goal = args.get("goal", "")
        if goal and len(goal) > 60:
            goal = goal[:57] + "..."
        return f"delegate: {goal}" if goal else "delegate task"
+    if tool_name == "session_search":
+        query = str(args.get("query") or "").strip()
+        return f"session search: {query}" if query else "recent sessions"
+    if tool_name == "memory":
+        action = str(args.get("action") or "manage").strip() or "manage"
+        target = str(args.get("target") or "memory").strip() or "memory"
+        return f"memory {action}: {target}"
    if tool_name == "execute_code":
-        return "execute code"
+        code = str(args.get("code") or "").strip()
+        first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
+        if first_line:
+            if len(first_line) > 70:
+                first_line = first_line[:67] + "..."
+            return f"python: {first_line}"
+        return "python code"
+    if tool_name == "todo":
+        items = args.get("todos")
+        if isinstance(items, list):
+            return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
+        return "todo"
+    if tool_name == "skill_view":
+        name = str(args.get("name") or "?").strip() or "?"
+        file_path = str(args.get("file_path") or "").strip()
+        suffix = f"/{file_path}" if file_path else ""
+        return f"skill view ({name}{suffix})"
+    if tool_name == "skills_list":
+        category = str(args.get("category") or "").strip()
+        return f"skills list ({category})" if category else "skills list"
+    if tool_name == "skill_manage":
+        action = str(args.get("action") or "manage").strip() or "manage"
+        name = str(args.get("name") or "?").strip() or "?"
+        file_path = str(args.get("file_path") or "").strip()
+        target = f"{name}/{file_path}" if file_path else name
+        if len(target) > 64:
+            target = target[:61] + "..."
+        return f"skill {action}: {target}"
+    if tool_name == "browser_navigate":
+        return f"navigate: {args.get('url', '?')}"
+    if tool_name == "browser_snapshot":
+        return "browser snapshot"
+    if tool_name == "browser_vision":
+        return f"browser vision: {str(args.get('question', '?'))[:50]}"
+    if tool_name == "browser_get_images":
+        return "browser images"
    if tool_name == "vision_analyze":
-        return f"analyze image: {args.get('question', '?')[:50]}"
+        return f"analyze image: {str(args.get('question', '?'))[:50]}"
+    if tool_name == "image_generate":
+        prompt = str(args.get("prompt") or args.get("description") or "").strip()
+        return f"generate image: {prompt[:50]}" if prompt else "generate image"
+    if tool_name == "cronjob":
+        action = str(args.get("action") or "manage").strip() or "manage"
+        job_id = str(args.get("job_id") or args.get("id") or "").strip()
+        return f"cron {action}: {job_id}" if job_id else f"cron {action}"
    return tool_name


+def _text(content: str) -> Any:
+    return acp.tool_content(acp.text_block(content))
+
+
+def _json_loads_maybe(value: Optional[str]) -> Any:
+    if not isinstance(value, str):
+        return value
+    try:
+        return json.loads(value)
+    except Exception:
+        pass
+
+    # Some Hermes tools append a human hint after a JSON payload, e.g.
+    # ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
+    # by decoding the first JSON value instead of falling back to raw text.
+    try:
+        decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
+        return decoded
+    except Exception:
+        return None
+
+
+def _truncate_text(text: str, limit: int = 5000) -> str:
+    if len(text) <= limit:
+        return text
+    return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
+
+
+def _fenced_text(text: str, language: str = "") -> str:
+    """Return a Markdown fence that cannot be broken by backticks in text."""
+    longest = max((len(run) for run in text.split("`")[1::2]), default=0)
+    fence = "`" * max(3, longest + 1)
+    return f"{fence}{language}\n{text}\n{fence}"
+
+
+def _format_todo_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
+        return None
+    summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
+    icon = {
+        "completed": "✅",
+        "in_progress": "🔄",
+        "pending": "⏳",
+        "cancelled": "✗",
+    }
+    lines = ["**Todo list**", ""]
+    for item in data["todos"]:
+        if not isinstance(item, dict):
+            continue
+        status = str(item.get("status") or "pending")
+        content = str(item.get("content") or item.get("id") or "").strip()
+        if content:
+            lines.append(f"- {icon.get(status, '•')} {content}")
+    if summary:
+        cancelled = summary.get("cancelled", 0)
+        lines.extend([
+            "",
+            "**Progress:** "
+            f"{summary.get('completed', 0)} completed, "
+            f"{summary.get('in_progress', 0)} in progress, "
+            f"{summary.get('pending', 0)} pending"
+            + (f", {cancelled} cancelled" if cancelled else ""),
+        ])
+    return "\n".join(lines)
+
+
+def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    if data.get("error") and not data.get("content"):
+        return f"Read failed: {data.get('error')}"
+    content = data.get("content")
+    if not isinstance(content, str):
+        return None
+    path = str((args or {}).get("path") or data.get("path") or "file").strip()
+    offset = (args or {}).get("offset")
+    limit = (args or {}).get("limit")
+    range_bits = []
+    if offset:
+        range_bits.append(f"from line {offset}")
+    if limit:
+        range_bits.append(f"limit {limit}")
+    suffix = f" ({', '.join(range_bits)})" if range_bits else ""
+    header = f"Read {path}{suffix}"
+    if data.get("total_lines") is not None:
+        header += f" — {data.get('total_lines')} total lines"
+    # Hermes read_file output is line-numbered with `|`. If we send it as raw
+    # Markdown, Zed can interpret pipes as tables and collapse the layout.
+    # Fence the payload so file lines stay readable and literal.
+    return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
+
+
+def _format_search_files_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    matches = data.get("matches")
+    if not isinstance(matches, list):
+        return None
+
+    total = data.get("total_count", len(matches))
+    shown = min(len(matches), 12)
+    truncated = bool(data.get("truncated")) or len(matches) > shown
+    lines = [
+        "Search results",
+        f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
+        "",
+    ]
+
+    for match in matches[:shown]:
+        if not isinstance(match, dict):
+            lines.append(f"- {match}")
+            continue
+
+        path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
+        line = match.get("line") or match.get("line_number")
+        content = str(match.get("content") or match.get("text") or "").strip()
+        loc = f"{path}:{line}" if line else path
+        lines.append(f"- {loc}")
+        if content:
+            snippet = _truncate_text(" ".join(content.split()), 300)
+            lines.append(f"  {snippet}")
+
+    if truncated:
+        lines.extend([
+            "",
+            "Results truncated. Narrow the search, add file_glob, or use offset to page.",
+        ])
+    return _truncate_text("\n".join(lines), limit=7000)
+
+
+def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return result if isinstance(result, str) and result.strip() else None
+    output = str(data.get("output") or "")
+    error = str(data.get("error") or "")
+    exit_code = data.get("exit_code")
+    parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
+    if output:
+        parts.extend(["", "Output:", output])
+    if error:
+        parts.extend(["", "Error:", error])
+    return _truncate_text("\n".join(parts))
+
+
+def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
+    headings: list[str] = []
+    for line in content.splitlines():
+        stripped = line.strip()
+        if stripped.startswith("#"):
+            heading = stripped.lstrip("#").strip()
+            if heading:
+                headings.append(heading)
+        if len(headings) >= limit:
+            break
+    return headings
+
+
+def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    if data.get("success") is False:
+        return f"Skill view failed: {data.get('error', 'unknown error')}"
+    name = str(data.get("name") or "skill")
+    file_path = str(data.get("file") or data.get("path") or "SKILL.md")
+    description = str(data.get("description") or "").strip()
+    content = str(data.get("content") or "")
+    linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
+
+    lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
+    if description:
+        lines.append(f"- **Description:** {description}")
+    if content:
+        lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
+    if linked:
+        linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
+        lines.append(f"- **Linked files:** {linked_count}")
+
+    headings = _extract_markdown_headings(content)
+    if headings:
+        lines.extend(["", "**Sections**"])
+        lines.extend(f"- {heading}" for heading in headings)
+
+    lines.extend([
+        "",
+        "_Full skill content is available to the agent but hidden here to keep ACP readable._",
+    ])
+    return "\n".join(lines)
+
+
+def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+
+    action = str((args or {}).get("action") or "manage").strip() or "manage"
+    name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
+    file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
+    success = data.get("success")
+    status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
+
+    lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
+    if action not in {"delete"}:
+        lines.append(f"- **File:** `{file_path}`")
+
+    message = str(data.get("message") or data.get("error") or "").strip()
+    if message:
+        lines.append(f"- **Result:** {message}")
+
+    replacements = data.get("replacements") or data.get("replacement_count")
+    if replacements is not None:
+        lines.append(f"- **Replacements:** {replacements}")
+
+    path = str(data.get("path") or "").strip()
+    if path:
+        lines.append(f"- **Path:** `{path}`")
+
+    return "\n".join(lines)
+
+
+def _format_web_search_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
+    if not isinstance(web, list):
+        return None
+    lines = [f"Web results: {len(web)}"]
+    for item in web[:10]:
+        if not isinstance(item, dict):
+            continue
+        title = str(item.get("title") or item.get("url") or "result").strip()
+        url = str(item.get("url") or "").strip()
+        desc = str(item.get("description") or "").strip()
+        lines.append(f"• {title}" + (f" — {url}" if url else ""))
+        if desc:
+            lines.append(f"  {desc}")
+    return _truncate_text("\n".join(lines))
+
+
+def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
+    """Return only web_extract errors for ACP; success stays compact via title."""
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    if data.get("success") is False and data.get("error"):
+        return f"Web extract failed: {data.get('error')}"
+    results = data.get("results")
+    if not isinstance(results, list):
+        return None
+
+    failures: list[str] = []
+    for item in results[:10]:
+        if not isinstance(item, dict):
+            continue
+        error = str(item.get("error") or "").strip()
+        if not error or error in {"None", "null"}:
+            continue
+        url = str(item.get("url") or "").strip()
+        title = str(item.get("title") or url or "Untitled").strip()
+        failures.append(
+            f"- {title}" + (f" — {url}" if url and url != title else "") + f"\n  Error: {_truncate_text(error, limit=500)}"
+        )
+
+    if not failures:
+        return None
+    lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
+    lines.extend(failures)
+    return "\n".join(lines)
+
+
+def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return result if isinstance(result, str) and result.strip() else None
+    if data.get("success") is False and data.get("error"):
+        return f"Process error: {data.get('error')}"
+    action = str((args or {}).get("action") or "process").strip() or "process"
+    if isinstance(data.get("processes"), list):
+        processes = data["processes"]
+        lines = [f"Processes: {len(processes)}"]
+        for proc in processes[:20]:
+            if not isinstance(proc, dict):
+                lines.append(f"- {proc}")
+                continue
+            sid = str(proc.get("session_id") or proc.get("id") or "?")
+            status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
+            cmd = str(proc.get("command") or "").strip()
+            pid = proc.get("pid")
+            code = proc.get("exit_code")
+            bits = [status]
+            if pid is not None:
+                bits.append(f"pid {pid}")
+            if code is not None:
+                bits.append(f"exit {code}")
+            lines.append(f"- `{sid}` — {', '.join(bits)}" + (f" — {cmd[:120]}" if cmd else ""))
+        if len(processes) > 20:
+            lines.append(f"... {len(processes) - 20} more process(es)")
+        return "\n".join(lines)
+
+    status = str(data.get("status") or data.get("state") or action).strip()
+    sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
+    lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
+    for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
+        if data.get(key) is not None:
+            lines.append(f"- **{label}:** {data.get(key)}")
+    output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
+    error = data.get("error") or data.get("stderr")
+    if output:
+        lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
+    if error:
+        lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
+    msg = data.get("message")
+    if msg and not output and not error:
+        lines.append(str(msg))
+    return _truncate_text("\n".join(lines), limit=7000)
+
+
+def _format_delegate_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    if data.get("error") and not isinstance(data.get("results"), list):
+        return f"Delegation failed: {data.get('error')}"
+    results = data.get("results")
+    if not isinstance(results, list):
+        return None
+    total = data.get("total_duration_seconds")
+    lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
+    icon = {"completed": "✅", "failed": "✗", "error": "✗", "timeout": "⏱", "interrupted": "⚠"}
+    for item in results:
+        if not isinstance(item, dict):
+            lines.append(f"- {item}")
+            continue
+        idx = item.get("task_index")
+        status = str(item.get("status") or "unknown")
+        model = item.get("model")
+        dur = item.get("duration_seconds")
+        role = item.get("_child_role")
+        header = f"{icon.get(status, '•')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
+        bits = []
+        if model:
+            bits.append(str(model))
+        if role:
+            bits.append(f"role={role}")
+        if dur is not None:
+            bits.append(f"{dur}s")
+        if bits:
+            header += " (" + ", ".join(bits) + ")"
+        lines.extend(["", header])
+        summary = str(item.get("summary") or "").strip()
+        error = str(item.get("error") or "").strip()
+        if summary:
+            lines.append(_truncate_text(summary, limit=1200))
+        if error:
+            lines.append("Error: " + _truncate_text(error, limit=800))
+        trace = item.get("tool_trace")
+        if isinstance(trace, list) and trace:
+            names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
+            if names:
+                lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
+    return _truncate_text("\n".join(lines), limit=8000)
+
+
+def _format_session_search_result(result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    if data.get("success") is False:
+        return f"Session search failed: {data.get('error', 'unknown error')}"
+    results = data.get("results")
+    if not isinstance(results, list):
+        return None
+    mode = data.get("mode") or "search"
+    query = data.get("query")
+    lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
+    if not results:
+        lines.append(str(data.get("message") or "No matching sessions found."))
+        return "\n".join(lines)
+    for item in results:
+        if not isinstance(item, dict):
+            continue
+        sid = str(item.get("session_id") or "?")
+        title = str(item.get("title") or item.get("when") or "Untitled session").strip()
+        when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
+        count = item.get("message_count")
+        source = str(item.get("source") or "").strip()
+        meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
+        lines.append(f"- **{title}** (`{sid}`)" + (f" — {meta}" if meta else ""))
+        summary = str(item.get("summary") or item.get("preview") or "").strip()
+        if summary:
+            lines.append("  " + _truncate_text(" ".join(summary.split()), limit=500))
+    return _truncate_text("\n".join(lines), limit=7000)
+
+
+def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return None
+    action = str((args or {}).get("action") or "memory").strip() or "memory"
+    target = str(data.get("target") or (args or {}).get("target") or "memory")
+    if data.get("success") is False:
+        lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
+        matches = data.get("matches")
+        if isinstance(matches, list) and matches:
+            lines.append("Matches:")
+            lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
+        return "\n".join(lines)
+    lines = [f"✅ Memory {action} saved ({target})"]
+    if data.get("message"):
+        lines.append(str(data.get("message")))
+    if data.get("entry_count") is not None:
+        lines.append(f"Entries: {data.get('entry_count')}")
+    if data.get("usage"):
+        lines.append(f"Usage: {data.get('usage')}")
+    # Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
+    preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
+    if preview:
+        lines.append("Preview: " + _truncate_text(preview, limit=300))
+    return "\n".join(lines)
+
+
+def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    path = str((args or {}).get("path") or "file").strip()
+    if isinstance(data, dict):
+        if data.get("success") is False or data.get("error"):
+            return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
+        message = str(data.get("message") or "").strip()
+        replacements = data.get("replacements") or data.get("replacement_count")
+        lines = [f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")]
+        if message:
+            lines.append(message)
+        if replacements is not None:
+            lines.append(f"Replacements: {replacements}")
+        if data.get("files_modified"):
+            files = data.get("files_modified")
+            if isinstance(files, list):
+                lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
+        return "\n".join(lines)
+    if isinstance(result, str) and result.strip():
+        return _truncate_text(result, limit=3000)
+    return f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")
+
+
+def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return result if isinstance(result, str) and result.strip() else None
+    if data.get("success") is False or data.get("error"):
+        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
+    if tool_name == "browser_get_images":
+        images = data.get("images") or data.get("data")
+        if isinstance(images, list):
+            lines = [f"Images found: {len(images)}"]
+            for img in images[:12]:
+                if isinstance(img, dict):
+                    alt = str(img.get("alt") or "").strip()
+                    url = str(img.get("url") or img.get("src") or "").strip()
+                    lines.append(f"- {alt or 'image'}" + (f" — {url}" if url else ""))
+            return _truncate_text("\n".join(lines), limit=5000)
+    title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
+    text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
+    lines = [title]
+    if data.get("url") and data.get("url") != title:
+        lines.append(str(data.get("url")))
+    if text:
+        lines.extend(["", _truncate_text(text, limit=5000)])
+    return _truncate_text("\n".join(lines), limit=7000)
+
+
+def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, dict):
+        return result if isinstance(result, str) and result.strip() else None
+    if data.get("success") is False or data.get("error"):
+        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
+    lines = [f"✅ {tool_name} completed"]
+    for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
+        if data.get(key):
+            lines.append(f"- **{key}:** {data.get(key)}")
+    return "\n".join(lines)
+
+
+def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
+    data = _json_loads_maybe(result)
+    if not isinstance(data, (dict, list)):
+        return result if isinstance(result, str) and result.strip() else None
+    if isinstance(data, list):
+        lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
+        for item in data[:12]:
+            lines.append(f"- {_truncate_text(str(item), limit=240)}")
+        return _truncate_text("\n".join(lines), limit=5000)
+
+    if data.get("success") is False or data.get("error"):
+        return f"{tool_name} failed: {data.get('error', 'unknown error')}"
+
+    lines = [f"✅ {tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
+    priority_keys = (
+        "message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
+        "state", "service", "url", "path", "file_path", "count", "total", "next_run",
+    )
+    seen = set()
+    for key in priority_keys:
+        value = data.get(key)
+        if value in (None, "", [], {}):
+            continue
+        seen.add(key)
+        lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
+
+    for key, value in data.items():
+        if key in seen or key in {"success", "raw", "content", "entries"}:
+            continue
+        if value in (None, "", [], {}):
+            continue
+        if isinstance(value, (dict, list)):
+            preview = json.dumps(value, ensure_ascii=False, default=str)
+        else:
+            preview = str(value)
+        lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
+        if len(lines) >= 14:
+            break
+
+    content = data.get("content")
+    if isinstance(content, str) and content.strip():
+        lines.extend(["", _truncate_text(content.strip(), limit=1500)])
+    return _truncate_text("\n".join(lines), limit=7000)
+
+
+def _build_polished_completion_content(
+    tool_name: str,
+    result: Optional[str],
+    function_args: Optional[Dict[str, Any]],
+) -> Optional[List[Any]]:
+    formatter = {
+        "todo": lambda: _format_todo_result(result),
+        "read_file": lambda: _format_read_file_result(result, function_args),
+        "write_file": lambda: _format_edit_result(tool_name, result, function_args),
+        "patch": lambda: _format_edit_result(tool_name, result, function_args),
+        "search_files": lambda: _format_search_files_result(result),
+        "execute_code": lambda: _format_execute_code_result(result),
+        "process": lambda: _format_process_result(result, function_args),
+        "delegate_task": lambda: _format_delegate_result(result),
+        "session_search": lambda: _format_session_search_result(result),
+        "memory": lambda: _format_memory_result(result, function_args),
+        "skill_view": lambda: _format_skill_view_result(result),
+        "skill_manage": lambda: _format_skill_manage_result(result, function_args),
+        "web_search": lambda: _format_web_search_result(result),
+        "web_extract": lambda: _format_web_extract_result(result),
+        "browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
+        "browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
+        "browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
+        "browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
+        "vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
+        "image_generate": lambda: _format_media_or_cron_result(tool_name, result),
+        "cronjob": lambda: _format_media_or_cron_result(tool_name, result),
+    }.get(tool_name)
+    if formatter is None and tool_name in _POLISHED_TOOLS:
+        formatter = lambda: _format_generic_structured_result(tool_name, result)
+    if formatter is None:
+        return None
+    text = formatter()
+    if not text:
+        return None
+    return [_text(text)]
+
+
 def _build_patch_mode_content(patch_text: str) -> List[Any]:
    """Parse V4A patch mode input into ACP diff blocks when possible."""
    if not patch_text:
@@ -258,7 +912,11 @@ def _build_tool_complete_content(
        except Exception:
            pass

-    return [acp.tool_content(acp.text_block(display_result))]
+    polished_content = _build_polished_completion_content(tool_name, result, function_args)
+    if polished_content:
+        return polished_content
+
+    return [_text(display_result)]


 # ---------------------------------------------------------------------------
@@ -288,7 +946,6 @@ def build_tool_start(
            content = _build_patch_mode_content(patch_text)
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
-            raw_input=arguments,
        )

    if tool_name == "write_file":
@@ -297,32 +954,172 @@ def build_tool_start(
        content = [acp.tool_diff_content(path=path, new_text=file_content)]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
-            raw_input=arguments,
        )

    if tool_name == "terminal":
        command = arguments.get("command", "")
-        content = [acp.tool_content(acp.text_block(f"$ {command}"))]
+        content = [_text(f"$ {command}")]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
-            raw_input=arguments,
        )

    if tool_name == "read_file":
-        path = arguments.get("path", "")
-        content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
+        # The title and location already identify the file. Sending a synthetic
+        # "Reading ..." content block makes Zed render an unhelpful Output
+        # section before the real file contents arrive on completion.
        return acp.start_tool_call(
-            tool_call_id, title, kind=kind, content=content, locations=locations,
-            raw_input=arguments,
+            tool_call_id, title, kind=kind, content=None, locations=locations,
        )

    if tool_name == "search_files":
        pattern = arguments.get("pattern", "")
        target = arguments.get("target", "content")
-        content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
+        search_path = arguments.get("path")
+        where = f" in {search_path}" if search_path else ""
+        content = [_text(f"Searching for '{pattern}' ({target}){where}")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "todo":
+        items = arguments.get("todos")
+        if isinstance(items, list):
+            preview_lines = ["Updating todo list", ""]
+            for item in items[:8]:
+                if isinstance(item, dict):
+                    preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
+            if len(items) > 8:
+                preview_lines.append(f"... {len(items) - 8} more")
+            content = [_text("\n".join(preview_lines))]
+        else:
+            content = [_text("Reading todo list")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "skill_view":
+        name = str(arguments.get("name") or "?").strip() or "?"
+        file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
+        content = [_text(f"Loading skill '{name}' ({file_path})")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "skill_manage":
+        action = str(arguments.get("action") or "manage").strip() or "manage"
+        name = str(arguments.get("name") or "?").strip() or "?"
+        file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
+        path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
+
+        if action == "patch":
+            old = str(arguments.get("old_string") or "")
+            new = str(arguments.get("new_string") or "")
+            content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
+        elif action in {"edit", "create"}:
+            content = [
+                acp.tool_diff_content(
+                    path=path,
+                    new_text=str(arguments.get("content") or ""),
+                )
+            ]
+        elif action == "write_file":
+            target = str(arguments.get("file_path") or "file")
+            content = [
+                acp.tool_diff_content(
+                    path=f"skills/{name}/{target}",
+                    new_text=str(arguments.get("file_content") or ""),
+                )
+            ]
+        elif action in {"delete", "remove_file"}:
+            target = str(arguments.get("file_path") or file_path or name)
+            content = [_text(f"Removing {target} from skill '{name}'")]
+        else:
+            content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
+
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "execute_code":
+        code = str(arguments.get("code") or "").strip()
+        preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
+        content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "web_search":
+        query = str(arguments.get("query") or "").strip()
+        content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "web_extract":
+        # The title identifies the URL(s). Avoid a duplicate content block so
+        # Zed renders this like read_file: compact start, concise completion.
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=None, locations=locations,
+        )
+
+    if tool_name == "process":
+        action = str(arguments.get("action") or "").strip() or "manage"
+        sid = str(arguments.get("session_id") or "").strip()
+        data_preview = str(arguments.get("data") or "").strip()
+        text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
+        if data_preview:
+            text += "\nInput: " + _truncate_text(data_preview, limit=500)
+        content = [_text(text)]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "delegate_task":
+        tasks = arguments.get("tasks")
+        if isinstance(tasks, list) and tasks:
+            lines = [f"Delegating {len(tasks)} tasks", ""]
+            for i, task in enumerate(tasks[:8], 1):
+                if isinstance(task, dict):
+                    goal = str(task.get("goal") or "").strip()
+                    role = str(task.get("role") or "").strip()
+                    lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
+            if len(tasks) > 8:
+                lines.append(f"... {len(tasks) - 8} more")
+            content = [_text("\n".join(lines))]
+        else:
+            goal = str(arguments.get("goal") or "").strip()
+            content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "session_search":
+        query = str(arguments.get("query") or "").strip()
+        content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name == "memory":
+        action = str(arguments.get("action") or "manage").strip() or "manage"
+        target = str(arguments.get("target") or "memory").strip() or "memory"
+        preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
+        text = f"Memory {action} ({target})"
+        if preview:
+            text += "\nPreview: " + _truncate_text(preview, limit=500)
+        content = [_text(text)]
+        return acp.start_tool_call(
+            tool_call_id, title, kind=kind, content=content, locations=locations,
+        )
+
+    if tool_name in _POLISHED_TOOLS:
+        try:
+            args_text = json.dumps(arguments, indent=2, default=str)
+        except (TypeError, ValueError):
+            args_text = str(arguments)
+        content = [_text(_truncate_text(args_text, limit=1200))]
        return acp.start_tool_call(
            tool_call_id, title, kind=kind, content=content, locations=locations,
-            raw_input=arguments,
        )

    # Generic fallback
@@ -334,7 +1131,7 @@ def build_tool_start(
    content = [acp.tool_content(acp.text_block(args_text))]
    return acp.start_tool_call(
        tool_call_id, title, kind=kind, content=content, locations=locations,
-        raw_input=arguments,
+        raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
    )


@@ -347,18 +1144,22 @@ def build_tool_complete(
 ) -> ToolCallProgress:
    """Create a ToolCallUpdate (progress) event for a completed tool call."""
    kind = get_tool_kind(tool_name)
-    content = _build_tool_complete_content(
-        tool_name,
-        result,
-        function_args=function_args,
-        snapshot=snapshot,
-    )
+    if tool_name == "web_extract":
+        error_text = _format_web_extract_result(result)
+        content = [_text(error_text)] if error_text else None
+    else:
+        content = _build_tool_complete_content(
+            tool_name,
+            result,
+            function_args=function_args,
+            snapshot=snapshot,
+        )
    return acp.update_tool_call(
        tool_call_id,
        kind=kind,
        status="completed",
        content=content,
-        raw_output=result,
+        raw_output=None if tool_name in _POLISHED_TOOLS else result,
    )


@@ -1241,10 +1241,24 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    if not tools:
        return []
    result = []
+    seen_names: set = set()
    for t in tools:
        fn = t.get("function", {})
+        name = fn.get("name", "")
+        # Defensive dedup: Anthropic rejects requests with duplicate tool
+        # names.  Upstream injection paths already dedup, but this guard
+        # converts a hard API failure into a warning.  See: #18478
+        if name and name in seen_names:
+            logger.warning(
+                "convert_tools_to_anthropic: duplicate tool name '%s' "
+                "— dropping second occurrence",
+                name,
+            )
+            continue
+        if name:
+            seen_names.add(name)
        result.append({
-            "name": fn.get("name", ""),
+            "name": name,
            "description": fn.get("description", ""),
            "input_schema": _normalize_tool_input_schema(
                fn.get("parameters", {"type": "object", "properties": {}})
@@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
    "kimi-coding-cn",
 })

-# OpenRouter app attribution headers
-_OR_HEADERS = {
+# OpenRouter app attribution headers (base — always sent)
+_OR_HEADERS_BASE = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
    "X-OpenRouter-Title": "Hermes Agent",
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

+# Truthy values for boolean env-var parsing.
+_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
+
+
+def build_or_headers(or_config: dict | None = None) -> dict:
+    """Build OpenRouter headers, optionally including response-cache headers.
+
+    Precedence for response cache: env var > config.yaml > default (enabled).
+
+    Environment variables:
+        ``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
+            enables caching; ``0``/``false``/``no``/``off`` disables.
+            Overrides ``openrouter.response_cache`` in config.yaml.
+        ``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
+            Overrides ``openrouter.response_cache_ttl`` in config.yaml.
+
+    *or_config* is the ``openrouter`` section from config.yaml.  When *None*,
+    falls back to reading config from disk via ``load_config()``.
+    """
+    headers = dict(_OR_HEADERS_BASE)
+
+    # Resolve config from disk if not provided.
+    if or_config is None:
+        try:
+            from hermes_cli.config import load_config
+            or_config = load_config().get("openrouter", {})
+        except Exception:
+            or_config = {}
+
+    # Determine cache enabled: env var overrides config.
+    env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
+    if env_cache:
+        cache_enabled = env_cache in _TRUTHY_ENV_VALUES
+    else:
+        cache_enabled = or_config.get("response_cache", False)
+
+    if not cache_enabled:
+        return headers
+
+    headers["X-OpenRouter-Cache"] = "true"
+
+    # Determine TTL: env var overrides config.
+    env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
+    if env_ttl:
+        if env_ttl.isdigit():
+            ttl = int(env_ttl)
+            if 1 <= ttl <= 86400:
+                headers["X-OpenRouter-Cache-TTL"] = str(ttl)
+    else:
+        ttl = or_config.get("response_cache_ttl", 300)
+        if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
+            headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
+
+    return headers
+
 # Vercel AI Gateway app attribution headers. HTTP-Referer maps to
 # referrerUrl and X-Title maps to appName in the gateway's analytics.
 from hermes_cli import __version__ as _HERMES_VERSION
@@ -1149,23 +1204,23 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:



-def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
-        or_key = _pool_runtime_api_key(entry)
+        or_key = explicit_api_key or _pool_runtime_api_key(entry)
        if not or_key:
            return None, None
        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
        logger.debug("Auxiliary client: OpenRouter via pool")
        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+                       default_headers=build_or_headers()), _OPENROUTER_MODEL

-    or_key = os.getenv("OPENROUTER_API_KEY")
+    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+                   default_headers=build_or_headers()), _OPENROUTER_MODEL


 def _describe_openrouter_unavailable() -> str:
@@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    }
    sync_base_url = str(sync_client.base_url)
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
-        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+        async_kwargs["default_headers"] = build_or_headers()
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.copilot_auth import copilot_request_headers

@@ -2053,9 +2108,9 @@ def resolve_provider_client(
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

-    # ── OpenRouter ───────────────────────────────────────────────────
+    # ── OpenRouter ───────────────────────────────────────────
    if provider == "openrouter":
-        client, default = _try_openrouter()
+        client, default = _try_openrouter(explicit_api_key=explicit_api_key)
        if client is None:
            logger.warning(
                "resolve_provider_client: openrouter requested but %s",
@@ -3237,7 +3292,26 @@ def _build_call_kwargs(
            kwargs["max_tokens"] = max_tokens

    if tools:
-        kwargs["tools"] = tools
+        # Defensive dedup: providers like Google Vertex, Azure, and Bedrock
+        # reject requests with duplicate tool names (HTTP 400).  The upstream
+        # injection paths (run_agent.py) already dedup, but this guard
+        # converts a hard API failure into a warning if an upstream regression
+        # reintroduces duplicates.  See: #18478
+        _seen: set = set()
+        _deduped: list = []
+        for _t in tools:
+            _tname = (_t.get("function") or {}).get("name", "")
+            if _tname and _tname in _seen:
+                logger.warning(
+                    "_build_call_kwargs: duplicate tool name '%s' removed "
+                    "(provider=%s model=%s)",
+                    _tname, provider, model,
+                )
+                continue
+            if _tname:
+                _seen.add(_tname)
+            _deduped.append(_t)
+        kwargs["tools"] = _deduped

    # Provider-specific extra_body
    merged_extra = dict(extra_body or {})
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import logging
+import os
 import random
 import threading
 import time
@@ -13,7 +14,7 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
-from hermes_cli.config import get_env_value
+from hermes_cli.config import get_env_value, load_env
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -1380,6 +1381,16 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
+
+    # Prefer ~/.hermes/.env over os.environ — the user's config file is the
+    # authoritative source for Hermes credentials. Stale env vars from parent
+    # processes (Codex CLI, test scripts, etc.) should not override deliberate
+    # changes to the .env file.
+    def _get_env_prefer_dotenv(key: str) -> str:
+        env_file = load_env()
+        val = env_file.get(key) or os.environ.get(key) or ""
+        return val.strip()
+
    # Honour user suppression — `hermes auth remove <provider> <N>` for an
    # env-seeded credential marks the env:<VAR> source as suppressed so it
    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
@@ -1391,8 +1402,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
+        # Prefer ~/.hermes/.env over os.environ
+        token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@@ -1418,7 +1429,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
+        env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1429,8 +1440,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value(env_var) or "").strip()
+        # Prefer ~/.hermes/.env over os.environ
+        token = _get_env_prefer_dotenv(env_var)
        if not token:
            continue
        source = f"env:{env_var}"
@@ -387,6 +387,11 @@ CURATOR_REVIEW_PROMPT = (
    "  - skill_manage action=write_file — add a references/, templates/, "
    "or scripts/ file under an existing skill (the skill must already "
    "exist)\n"
+    "  - skill_manage action=delete     — archive a skill. MUST pass "
+    "`absorbed_into=<umbrella>` when you've merged its content into another "
+    "skill, or `absorbed_into=\"\"` when you're truly pruning with no "
+    "forwarding target. This drives cron-job skill-reference migration — "
+    "guessing from your YAML summary after the fact is fragile.\n"
    "  - terminal                       — mv a sibling into the archive "
    "OR move its content into a support subfile\n\n"
    "'keep' is a legitimate decision ONLY when the skill is already a "
@@ -637,15 +642,76 @@ def _parse_structured_summary(
    return out


+def _extract_absorbed_into_declarations(
+    tool_calls: List[Dict[str, Any]],
+) -> Dict[str, Dict[str, Any]]:
+    """Walk this run's tool calls and extract model-declared absorption targets.
+
+    The curator prompt requires every ``skill_manage(action='delete')`` call
+    to pass ``absorbed_into=<umbrella>`` when consolidating, or
+    ``absorbed_into=""`` when truly pruning. This is the single authoritative
+    signal for classification — the model's own declaration at the moment of
+    deletion, which beats both post-hoc YAML summary parsing and substring
+    heuristics on other tool calls.
+
+    Returns ``{skill_name: {"into": "<umbrella>" | "", "declared": True}}``.
+    Entries with ``into == ""`` are explicit prunings.
+    Skills without a ``skill_manage(delete)`` call, or with one that omitted
+    ``absorbed_into``, are not in the returned dict — caller falls back to
+    the existing heuristic/YAML logic for those (backward compat with older
+    curator runs and any callers that don't populate the arg).
+    """
+    out: Dict[str, Dict[str, Any]] = {}
+    for tc in tool_calls or []:
+        if not isinstance(tc, dict):
+            continue
+        if tc.get("name") != "skill_manage":
+            continue
+        raw = tc.get("arguments") or ""
+        args: Dict[str, Any] = {}
+        if isinstance(raw, dict):
+            args = raw
+        elif isinstance(raw, str):
+            try:
+                args = json.loads(raw)
+            except Exception:
+                continue
+        if not isinstance(args, dict):
+            continue
+        if args.get("action") != "delete":
+            continue
+        name = args.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        # absorbed_into must be present (even empty string is meaningful);
+        # missing key means the model didn't declare intent.
+        if "absorbed_into" not in args:
+            continue
+        target = args.get("absorbed_into")
+        if target is None:
+            continue
+        if not isinstance(target, str):
+            continue
+        out[name.strip()] = {"into": target.strip(), "declared": True}
+    return out
+
+
 def _reconcile_classification(
    removed: List[str],
    heuristic: Dict[str, List[Dict[str, Any]]],
    model_block: Dict[str, List[Dict[str, str]]],
    destinations: Set[str],
+    absorbed_declarations: Optional[Dict[str, Dict[str, Any]]] = None,
 ) -> Dict[str, List[Dict[str, Any]]]:
    """Merge heuristic (tool-call evidence) with the model's structured block.

-    Rules:
+    Rules (evaluated in order; first match wins):
+    - **Model-declared `absorbed_into` at delete time is authoritative.** Any
+      entry in ``absorbed_declarations`` beats every other signal. This is
+      the model telling us directly, at the moment of deletion, what it did.
+      ``into != ""`` and target exists → consolidated. ``into == ""`` →
+      pruned. ``into != ""`` but target doesn't exist → hallucination; fall
+      through to the usual signals.
    - Model-declared consolidation wins when its ``into`` target exists
      in ``destinations`` (survived or newly-created). This gives the
      model authority over intent + rationale.
@@ -666,6 +732,8 @@ def _reconcile_classification(
    model_cons = {e["from"]: e for e in model_block.get("consolidations", [])}
    model_pruned = {e["name"]: e for e in model_block.get("prunings", [])}

+    declared = absorbed_declarations or {}
+
    consolidated: List[Dict[str, Any]] = []
    pruned: List[Dict[str, Any]] = []

@@ -673,6 +741,36 @@ def _reconcile_classification(
        mc = model_cons.get(name)
        mp = model_pruned.get(name)
        hc = heur_cons.get(name)
+        dec = declared.get(name)
+
+        # Authoritative: model declared `absorbed_into` at the delete call.
+        if dec is not None:
+            into_claim = dec.get("into", "")
+            if into_claim and into_claim in destinations:
+                entry: Dict[str, Any] = {
+                    "name": name,
+                    "into": into_claim,
+                    "source": "absorbed_into (model-declared at delete)",
+                    "reason": (mc.get("reason") or "") if mc else "",
+                }
+                if hc and hc.get("evidence"):
+                    entry["evidence"] = hc["evidence"]
+                consolidated.append(entry)
+                continue
+            if into_claim == "":
+                # Explicit prune declaration
+                pruned.append({
+                    "name": name,
+                    "source": "absorbed_into=\"\" (model-declared prune)",
+                    "reason": (mp.get("reason") or "") if mp else "",
+                })
+                continue
+            # into_claim is non-empty but target doesn't exist: the model
+            # named a nonexistent umbrella at delete time. The tool already
+            # rejects this at the skill_manage layer, so we shouldn't see it
+            # in practice — but if it slips through (e.g. the umbrella was
+            # deleted LATER in the same run), fall through to the usual
+            # signals rather than trusting a broken reference.

        # Model says consolidated — trust it if the destination is real.
        if mc and mc.get("into") in destinations:
@@ -808,11 +906,20 @@ def _write_run_report(
    )
    model_block = _parse_structured_summary(llm_meta.get("final", "") or "")
    destinations = set(after_names) | set(added or [])
+    # Authoritative signal: extract per-delete `absorbed_into` declarations
+    # from this run's tool calls. These beat both the YAML summary block and
+    # the substring heuristic — the model is telling us directly, at the
+    # moment of deletion, whether each archived skill was consolidated
+    # (into=<umbrella>) or pruned (into="").
+    absorbed_declarations = _extract_absorbed_into_declarations(
+        llm_meta.get("tool_calls", []) or []
+    )
    classification = _reconcile_classification(
        removed=removed,
        heuristic=heuristic,
        model_block=model_block,
        destinations=destinations,
+        absorbed_declarations=absorbed_declarations,
    )
    consolidated = classification["consolidated"]
    pruned = classification["pruned"]
@@ -21,6 +21,18 @@ It DOES include:
    pointer — otherwise the curator would immediately re-fire on the next
    tick)
  - ``.bundled_manifest`` (so protection markers stay consistent)
+
+Alongside the skills tarball, each snapshot also captures a copy of
+``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
+jobs reference skills by name in their ``skills``/``skill`` fields; the
+curator's consolidation pass rewrites those in place via
+``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
+rolling back the skills tree would leave cron jobs pointing at the
+umbrella skills even though the narrow skills they were originally
+configured with have been restored. We store the whole jobs.json for
+fidelity but rollback only touches the ``skills``/``skill`` fields — the
+rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
+we leave it alone.
 """

 from __future__ import annotations
@@ -63,6 +75,60 @@ def _skills_dir() -> Path:
    return get_hermes_home() / "skills"


+def _cron_jobs_file() -> Path:
+    """Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
+    return get_hermes_home() / "cron" / "jobs.json"
+
+
+CRON_JOBS_FILENAME = "cron-jobs.json"
+
+
+def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
+    """Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
+
+    Returns a small dict describing what was captured so the caller can
+    fold it into the manifest. Never raises — if the cron file is missing
+    or unreadable, the return dict has ``backed_up=False`` and the reason,
+    and the snapshot proceeds without cron data (the snapshot is still
+    useful for rolling back skills).
+    """
+    src = _cron_jobs_file()
+    info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
+    if not src.exists():
+        info["reason"] = "no cron/jobs.json present"
+        return info
+    try:
+        raw = src.read_text(encoding="utf-8")
+    except OSError as e:
+        logger.debug("Failed to read cron/jobs.json for backup: %s", e)
+        info["reason"] = f"read error: {e}"
+        return info
+    # Count jobs as a nice diagnostic — but don't fail the snapshot if the
+    # file is unparseable; just store the raw text and let rollback deal
+    # with it (or not, if it's corrupted). jobs.json wraps the list as
+    # `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
+    # fall back to bare-list shape just in case the format ever changes.
+    try:
+        parsed = json.loads(raw)
+        if isinstance(parsed, dict):
+            inner = parsed.get("jobs")
+            if isinstance(inner, list):
+                info["jobs_count"] = len(inner)
+        elif isinstance(parsed, list):
+            info["jobs_count"] = len(parsed)
+    except (json.JSONDecodeError, TypeError):
+        info["jobs_count"] = 0
+        info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
+    try:
+        (dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
+    except OSError as e:
+        logger.debug("Failed to write cron backup file: %s", e)
+        info["reason"] = f"write error: {e}"
+        return info
+    info["backed_up"] = True
+    return info
+
+
 def _utc_id(now: Optional[datetime] = None) -> str:
    """UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
    if now is None:
@@ -116,7 +182,8 @@ def _count_skill_files(base: Path) -> int:


 def _write_manifest(dest: Path, reason: str, archive_path: Path,
-                    skills_counted: int) -> None:
+                    skills_counted: int,
+                    cron_info: Optional[Dict[str, Any]] = None) -> None:
    manifest = {
        "id": dest.name,
        "reason": reason,
@@ -125,6 +192,15 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path,
        "archive_bytes": archive_path.stat().st_size,
        "skill_files": skills_counted,
    }
+    if cron_info is not None:
+        manifest["cron_jobs"] = {
+            "backed_up": bool(cron_info.get("backed_up", False)),
+            "jobs_count": int(cron_info.get("jobs_count", 0)),
+        }
+        if not cron_info.get("backed_up"):
+            manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
+        if cron_info.get("parse_warning"):
+            manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
    (dest / "manifest.json").write_text(
        json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
    )
@@ -181,7 +257,14 @@ def snapshot_skills(reason: str = "manual") -> Optional[Path]:
                # arcname: store paths relative to skills/ so extraction
                # drops cleanly back into the skills dir.
                tf.add(str(entry), arcname=entry.name, recursive=True)
-        _write_manifest(dest, reason, archive, _count_skill_files(skills))
+        # Capture cron/jobs.json alongside the tarball. Never fails the
+        # snapshot — the skills side is the core guarantee; cron is
+        # additive. We still record in the manifest whether it was
+        # captured so rollback can surface "no cron data in this snapshot".
+        cron_info = _backup_cron_jobs_into(dest)
+        _write_manifest(dest, reason, archive,
+                        _count_skill_files(skills),
+                        cron_info=cron_info)
    except (OSError, tarfile.TarError) as e:
        logger.debug("Curator snapshot failed: %s", e, exc_info=True)
        # Clean up partial snapshot
@@ -298,6 +381,149 @@ def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
    return candidates[0] if candidates else None


+def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
+    """Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
+
+    We do NOT overwrite the whole cron file. Only the ``skills`` and
+    ``skill`` fields are restored, and only on jobs that still exist in the
+    current file (matched by ``id``). Everything else about the job —
+    schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
+    is live state that the user/scheduler has modified since the snapshot;
+    overwriting it would regress unrelated cron activity.
+
+    Rules:
+    - Jobs present in backup AND live, with differing skills → skills restored.
+    - Jobs present in backup AND live, with matching skills → no-op.
+    - Jobs present in backup but gone from live (user deleted the job
+      after the snapshot) → skipped, noted in the return report.
+    - Jobs present in live but not in backup (user created a new cron
+      job after the snapshot) → left untouched.
+
+    Never raises; failures are captured in the return dict. Writes through
+    ``cron.jobs`` to pick up the same lock + atomic-write path that tick()
+    uses, so we don't race the scheduler.
+    """
+    report: Dict[str, Any] = {
+        "attempted": False,
+        "restored": [],
+        "skipped_missing": [],
+        "unchanged": 0,
+        "error": None,
+    }
+    backup_file = snapshot_dir / CRON_JOBS_FILENAME
+    if not backup_file.exists():
+        report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
+        return report
+
+    try:
+        backup_text = backup_file.read_text(encoding="utf-8")
+        backup_parsed = json.loads(backup_text)
+    except (OSError, json.JSONDecodeError) as e:
+        report["error"] = f"failed to load backed-up jobs: {e}"
+        return report
+    # jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
+    # that shape and a bare list for forward compat.
+    if isinstance(backup_parsed, dict):
+        backup_jobs = backup_parsed.get("jobs")
+    elif isinstance(backup_parsed, list):
+        backup_jobs = backup_parsed
+    else:
+        backup_jobs = None
+    if not isinstance(backup_jobs, list):
+        report["error"] = "backed-up cron-jobs.json has no jobs list"
+        return report
+
+    # Build a lookup of the backed-up skill state keyed by job id.
+    # We only need the two skill-ish fields (legacy single and modern list).
+    backup_by_id: Dict[str, Dict[str, Any]] = {}
+    for job in backup_jobs:
+        if not isinstance(job, dict):
+            continue
+        jid = job.get("id")
+        if not isinstance(jid, str) or not jid:
+            continue
+        backup_by_id[jid] = {
+            "skills": job.get("skills"),
+            "skill": job.get("skill"),
+            "name": job.get("name") or jid,
+        }
+
+    if not backup_by_id:
+        report["attempted"] = True  # we tried but there was nothing to do
+        return report
+
+    # Load and rewrite the live jobs under the scheduler's lock.
+    try:
+        from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
+    except ImportError as e:
+        report["error"] = f"cron module unavailable: {e}"
+        return report
+
+    report["attempted"] = True
+    try:
+        with _jobs_file_lock:
+            live_jobs = load_jobs()
+            changed = False
+
+            live_ids = set()
+            for live in live_jobs:
+                if not isinstance(live, dict):
+                    continue
+                jid = live.get("id")
+                if not isinstance(jid, str) or not jid:
+                    continue
+                live_ids.add(jid)
+
+                backup = backup_by_id.get(jid)
+                if backup is None:
+                    continue  # live job didn't exist at snapshot time
+
+                cur_skills = live.get("skills")
+                cur_skill = live.get("skill")
+                bkp_skills = backup.get("skills")
+                bkp_skill = backup.get("skill")
+
+                if cur_skills == bkp_skills and cur_skill == bkp_skill:
+                    report["unchanged"] += 1
+                    continue
+
+                # Restore. Preserve absence (don't force the key to appear
+                # if the backup didn't have it either).
+                if bkp_skills is None:
+                    live.pop("skills", None)
+                else:
+                    live["skills"] = bkp_skills
+                if bkp_skill is None:
+                    live.pop("skill", None)
+                else:
+                    live["skill"] = bkp_skill
+
+                report["restored"].append({
+                    "job_id": jid,
+                    "job_name": backup.get("name") or jid,
+                    "from": {"skills": cur_skills, "skill": cur_skill},
+                    "to": {"skills": bkp_skills, "skill": bkp_skill},
+                })
+                changed = True
+
+            # Jobs in backup but not in live = user deleted them after snapshot
+            for jid, backup in backup_by_id.items():
+                if jid not in live_ids:
+                    report["skipped_missing"].append({
+                        "job_id": jid,
+                        "job_name": backup.get("name") or jid,
+                    })
+
+            if changed:
+                save_jobs(live_jobs)
+    except Exception as e:  # noqa: BLE001 — rollback must not die mid-restore
+        logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
+        report["error"] = f"restore failed mid-flight: {e}"
+
+    return report
+
+
+
 def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
    """Restore ``~/.hermes/skills/`` from a snapshot.

@@ -408,8 +634,35 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]
    except OSError:
        pass

-    logger.info("Curator rollback: restored from %s", target.name)
-    return (True, f"restored from snapshot {target.name}", target)
+    # Reconcile cron skill-links. Surgical: only the skills/skill fields
+    # on jobs matched by id. Everything else in jobs.json is live state
+    # (schedule, next_run_at, enabled, prompt, etc.) and we leave it
+    # alone. Failures here don't fail the overall rollback — the skills
+    # tree is already restored, which is the main guarantee.
+    cron_report = _restore_cron_skill_links(target)
+
+    summary_bits = [f"restored from snapshot {target.name}"]
+    if cron_report.get("attempted"):
+        restored_n = len(cron_report.get("restored") or [])
+        skipped_n = len(cron_report.get("skipped_missing") or [])
+        if cron_report.get("error"):
+            summary_bits.append(f"cron links: error — {cron_report['error']}")
+        elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
+            # Attempted but nothing matched — empty snapshot or no overlapping ids.
+            pass
+        else:
+            parts = []
+            if restored_n:
+                parts.append(f"{restored_n} job(s) had skill links restored")
+            if skipped_n:
+                parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
+            if cron_report.get("unchanged"):
+                parts.append(f"{cron_report['unchanged']} already matched")
+            summary_bits.append("cron links: " + ", ".join(parts))
+
+    logger.info("Curator rollback: restored from %s (cron_report=%s)",
+                target.name, cron_report)
+    return (True, "; ".join(summary_bits), target)


 # ---------------------------------------------------------------------------
@@ -6,6 +6,7 @@ can invoke skills via /skill-name commands.

 import json
 import logging
+import os
 import re
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -20,10 +21,35 @@ from agent.skill_preprocessing import (
 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
+_skill_commands_platform: Optional[str] = None
 # Patterns for sanitizing skill names into clean hyphen-separated slugs.
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+
+def _resolve_skill_commands_platform() -> Optional[str]:
+    """Return the current platform scope used for disabled-skill filtering.
+
+    Used to detect when the active platform has shifted so
+    :func:`get_skill_commands` can drop a stale cache that was populated
+    for a different platform's ``skills.platform_disabled`` view (#14536).
+
+    Resolves from (in order) ``HERMES_PLATFORM`` env var and
+    ``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
+    ``None`` when no platform scope is active (e.g. classic CLI, RL
+    rollouts, standalone scripts).
+    """
+    try:
+        from gateway.session_context import get_session_env
+
+        resolved_platform = (
+            os.getenv("HERMES_PLATFORM")
+            or get_session_env("HERMES_SESSION_PLATFORM")
+        )
+    except Exception:
+        resolved_platform = os.getenv("HERMES_PLATFORM")
+    return resolved_platform or None
+
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -218,7 +244,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    Returns:
        Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
    """
-    global _skill_commands
+    global _skill_commands, _skill_commands_platform
+    _skill_commands_platform = _resolve_skill_commands_platform()
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
@@ -278,8 +305,16 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:


 def get_skill_commands() -> Dict[str, Dict[str, Any]]:
-    """Return the current skill commands mapping (scan first if empty)."""
-    if not _skill_commands:
+    """Return the current skill commands mapping (scan first if empty).
+
+    Rescans when the active platform scope changes (e.g. a gateway
+    process serving Telegram and Discord concurrently) so each platform
+    sees its own ``skills.platform_disabled`` view (#14536).
+    """
+    if (
+        not _skill_commands
+        or _skill_commands_platform != _resolve_skill_commands_platform()
+    ):
        scan_skill_commands()
    return _skill_commands

@@ -121,6 +121,18 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

+# =============================================================================
+# OpenRouter Response Caching (only applies when using OpenRouter)
+# =============================================================================
+# Cache identical API responses at the OpenRouter edge for free instant replays.
+# When enabled, identical requests (same model, messages, parameters) return
+# cached responses with zero billing. Separate from Anthropic prompt caching.
+# See: https://openrouter.ai/docs/guides/features/response-caching
+#
+# openrouter:
+#   response_cache: true         # Enable response caching (default: true)
+#   response_cache_ttl: 300      # Cache TTL in seconds, 1-86400 (default: 300)
+
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -2928,7 +2928,14 @@ class HermesCLI:

        def _expand_ref(match):
            path = Path(match.group(1))
-            return path.read_text(encoding="utf-8") if path.exists() else match.group(0)
+            # Use try/except instead of path.exists() to avoid TOCTOU race:
+            # the paste file may be deleted between check and read, causing
+            # the input to be silently dropped (#17666).
+            try:
+                return path.read_text(encoding="utf-8")
+            except (OSError, IOError):
+                logger.warning("Paste file gone or unreadable, returning placeholder: %s", path)
+                return match.group(0)

        return paste_ref_re.sub(_expand_ref, text)

@@ -11584,7 +11591,7 @@ class HermesCLI:
                            pass  # Non-fatal — don't break the main loop

                except Exception as e:
-                    print(f"Error: {e}")
+                    logger.warning("process_loop unhandled error (msg may be lost): %s", e)
        
        # Start processing thread
        process_thread = threading.Thread(target=process_loop, daemon=True)
@@ -0,0 +1,84 @@
+"""Shared HTTP client factory for long-lived platform adapters.
+
+Gateway messaging platforms (QQ Bot, Feishu, WeCom, DingTalk, Signal,
+BlueBubbles, WeCom-callback) keep a persistent ``httpx.AsyncClient``
+alive for the adapter's lifetime.  That amortises TLS/connection setup
+across many API calls, but it also means the process's file-descriptor
+pressure is sensitive to how aggressively the pool recycles idle keep-
+alive connections.
+
+httpx's default ``keepalive_expiry`` is 5 seconds.  On macOS behind
+Cloudflare Warp (and other transparent proxies), peer-initiated FIN can
+sit in ``CLOSE_WAIT`` longer than that before the local socket actually
+drains — which, multiplied across 7 long-lived adapters plus the LLM
+client and MCP clients, walks straight into the default 256 fd limit.
+See #18451.
+
+``platform_httpx_limits()`` returns a tighter ``httpx.Limits`` the
+adapter factories use instead of the httpx default.  The values chosen:
+
+* ``max_keepalive_connections=10`` — plenty for any single adapter;
+  platform APIs rarely parallelise beyond this.
+* ``keepalive_expiry=2.0`` — close idle sockets aggressively so a
+  proxy's lingering CLOSE_WAIT window can't starve the process.
+
+Override via ``HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY`` /
+``HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE`` env vars when tuning under load.
+"""
+
+from __future__ import annotations
+
+import os
+
+try:
+    import httpx
+except ImportError:  # pragma: no cover — optional dep
+    httpx = None  # type: ignore[assignment]
+
+
+_DEFAULT_KEEPALIVE_EXPIRY_S = 2.0
+_DEFAULT_MAX_KEEPALIVE = 10
+
+
+def platform_httpx_limits() -> "httpx.Limits | None":
+    """Return ``httpx.Limits`` tuned for persistent platform-adapter clients.
+
+    Returns ``None`` when httpx isn't importable, so callers can fall
+    back to httpx's built-in default without a hard dependency on this
+    helper being reachable.
+    """
+    if httpx is None:
+        return None
+
+    def _env_float(name: str, default: float) -> float:
+        raw = os.environ.get(name, "").strip()
+        if not raw:
+            return default
+        try:
+            val = float(raw)
+        except (TypeError, ValueError):
+            return default
+        return val if val > 0 else default
+
+    def _env_int(name: str, default: int) -> int:
+        raw = os.environ.get(name, "").strip()
+        if not raw:
+            return default
+        try:
+            val = int(raw)
+        except (TypeError, ValueError):
+            return default
+        return val if val > 0 else default
+
+    keepalive_expiry = _env_float(
+        "HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", _DEFAULT_KEEPALIVE_EXPIRY_S
+    )
+    max_keepalive = _env_int(
+        "HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", _DEFAULT_MAX_KEEPALIVE
+    )
+
+    return httpx.Limits(
+        max_keepalive_connections=max_keepalive,
+        # Leave max_connections at httpx default (100) — plenty of headroom.
+        keepalive_expiry=keepalive_expiry,
+    )
@@ -162,7 +162,9 @@ class BlueBubblesAdapter(BasePlatformAdapter):
            return False
        from aiohttp import web

-        self.client = httpx.AsyncClient(timeout=30.0)
+        # Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
+        from gateway.platforms._http_client_limits import platform_httpx_limits
+        self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
        try:
            await self._api_get("/api/v1/ping")
            info = await self._api_get("/api/v1/server/info")
@@ -228,7 +228,11 @@ class DingTalkAdapter(BasePlatformAdapter):
            return False

        try:
-            self._http_client = httpx.AsyncClient(timeout=30.0)
+            # Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
+            from gateway.platforms._http_client_limits import platform_httpx_limits
+            self._http_client = httpx.AsyncClient(
+                timeout=30.0, limits=platform_httpx_limits(),
+            )

            credential = dingtalk_stream.Credential(
                self._client_id, self._client_secret
@@ -613,6 +613,21 @@ class DiscordAdapter(BasePlatformAdapter):
            # so LLM output or echoed user content can't ping the whole
            # server; override per DISCORD_ALLOW_MENTION_* env vars or the
            # discord.allow_mentions.* block in config.yaml.
+
+            # Close any existing client to prevent zombie websocket connections
+            # on reconnect (see #18187). Without this, the old client remains
+            # connected to Discord gateway and both fire on_message, causing
+            # double responses.
+            if self._client is not None:
+                try:
+                    if not self._client.is_closed():
+                        await self._client.close()
+                except Exception:
+                    logger.debug("[%s] Failed to close previous Discord client", self.name)
+                finally:
+                    self._client = None
+                    self._ready_event.clear()
+
            self._client = commands.Bot(
                command_prefix="!",  # Not really used, we handle raw messages
                intents=intents,
@@ -2584,40 +2599,32 @@ class DiscordAdapter(BasePlatformAdapter):
        hidden skills. The slash picker also becomes more discoverable —
        Discord live-filters by the user's typed prefix against both the
        skill name and its description.
+
+        The entries list and lookup dict are stored on ``self`` rather
+        than captured in closure variables so :meth:`refresh_skill_group`
+        can repopulate them when the user runs ``/reload-skills`` without
+        needing to touch the Discord slash-command tree or trigger a
+        ``tree.sync()`` call.
        """
        try:
-            from hermes_cli.commands import discord_skill_commands_by_category
-
            existing_names = set()
            try:
                existing_names = {cmd.name for cmd in tree.get_commands()}
            except Exception:
                pass

-            # Reuse the existing collector for consistent filtering
-            # (per-platform disabled, hub-excluded, name clamping), then
-            # flatten — the category grouping was only useful for the
-            # nested layout.
-            categories, uncategorized, hidden = discord_skill_commands_by_category(
-                reserved_names=existing_names,
-            )
-            entries: list[tuple[str, str, str]] = list(uncategorized)
-            for cat_skills in categories.values():
-                entries.extend(cat_skills)
+            # Populate the instance-level entries/lookup so the
+            # autocomplete + handler callbacks below always read the
+            # freshest state. refresh_skill_group() re-runs the same
+            # collector and mutates these two attributes in place.
+            self._skill_entries: list[tuple[str, str, str]] = []
+            self._skill_lookup: dict[str, tuple[str, str]] = {}
+            self._skill_group_reserved_names: set[str] = set(existing_names)
+            self._refresh_skill_catalog_state()

-            if not entries:
+            if not self._skill_entries:
                return

-            # Stable alphabetical order so the autocomplete suggestion
-            # list is predictable across restarts.
-            entries.sort(key=lambda t: t[0])
-
-            # name -> (description, cmd_key) — used by both the autocomplete
-            # callback and the handler for O(1) dispatch.
-            skill_lookup: dict[str, tuple[str, str]] = {
-                n: (d, k) for n, d, k in entries
-            }
-
            async def _autocomplete_name(
                interaction: "discord.Interaction", current: str,
            ) -> list:
@@ -2627,10 +2634,13 @@ class DiscordAdapter(BasePlatformAdapter):
                "/skill pdf" surfaces skills whose description mentions
                PDFs even if the name doesn't. Discord caps this list at
                25 entries per query.
+
+                Reads ``self._skill_entries`` so a ``/reload-skills`` run
+                since process start shows up on the very next keystroke.
                """
                q = (current or "").strip().lower()
                choices: list = []
-                for name, desc, _key in entries:
+                for name, desc, _key in self._skill_entries:
                    if not q or q in name.lower() or (desc and q in desc.lower()):
                        if desc:
                            label = f"{name} — {desc}"
@@ -2654,7 +2664,7 @@ class DiscordAdapter(BasePlatformAdapter):
            async def _skill_handler(
                interaction: "discord.Interaction", name: str, args: str = "",
            ):
-                entry = skill_lookup.get(name)
+                entry = self._skill_lookup.get(name)
                if not entry:
                    await interaction.response.send_message(
                        f"Unknown skill: `{name}`. Start typing for "
@@ -2676,16 +2686,74 @@ class DiscordAdapter(BasePlatformAdapter):

            logger.info(
                "[%s] Registered /skill command with %d skill(s) via autocomplete",
-                self.name, len(entries),
+                self.name, len(self._skill_entries),
            )
-            if hidden:
+            if self._skill_group_hidden_count:
                logger.info(
                    "[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
-                    self.name, hidden,
+                    self.name, self._skill_group_hidden_count,
                )
        except Exception as exc:
            logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)

+    def _refresh_skill_catalog_state(self) -> None:
+        """Re-scan disk for skills and repopulate ``self._skill_entries``.
+
+        Called once from :meth:`_register_skill_group` at startup and
+        again from :meth:`refresh_skill_group` whenever the user runs
+        ``/reload-skills``. No Discord API calls are made — autocomplete
+        and the handler both read from these instance attributes
+        directly, so an in-place mutation is sufficient.
+        """
+        from hermes_cli.commands import discord_skill_commands_by_category
+
+        reserved = getattr(self, "_skill_group_reserved_names", set())
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(reserved),
+        )
+        entries: list[tuple[str, str, str]] = list(uncategorized)
+        for cat_skills in categories.values():
+            entries.extend(cat_skills)
+        # Stable alphabetical order so the autocomplete suggestion
+        # list is predictable across restarts.
+        entries.sort(key=lambda t: t[0])
+
+        self._skill_entries = entries
+        self._skill_lookup = {n: (d, k) for n, d, k in entries}
+        self._skill_group_hidden_count = hidden
+
+    def refresh_skill_group(self) -> tuple[int, int]:
+        """Rescan skills and update the live ``/skill`` autocomplete state.
+
+        Invoked by :meth:`gateway.run.GatewayOrchestrator._handle_reload_skills_command`
+        after :func:`agent.skill_commands.reload_skills` has refreshed
+        the in-process skill-command registry. Without this call, the
+        ``/skill`` autocomplete dropdown keeps showing the list captured
+        at process start — new skills stay invisible and deleted skills
+        return an "Unknown skill" error when clicked.
+
+        Because autocomplete options are fetched dynamically by Discord,
+        we only need to mutate the entries/lookup attributes read by the
+        callbacks — no ``tree.sync()`` is required.
+
+        Returns ``(new_count, hidden_count)``.
+        """
+        try:
+            self._refresh_skill_catalog_state()
+        except Exception as exc:
+            logger.warning(
+                "[%s] Failed to refresh /skill autocomplete after reload: %s",
+                self.name, exc,
+            )
+            return (len(getattr(self, "_skill_entries", [])), 0)
+        logger.info(
+            "[%s] Refreshed /skill autocomplete: %d skill(s) available (%d filtered)",
+            self.name,
+            len(self._skill_entries),
+            self._skill_group_hidden_count,
+        )
+        return (len(self._skill_entries), self._skill_group_hidden_count)
+
    def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
        """Build a MessageEvent from a Discord slash command interaction."""
        is_dm = isinstance(interaction.channel, discord.DMChannel)
@@ -2922,13 +2922,18 @@ class FeishuAdapter(BasePlatformAdapter):
                },
            )
            response.raise_for_status()
+            # Snapshot Content-Type and body while the client context is
+            # still active so pooled connections fully release on exit.
+            # See #18451.
+            content_type_hdr = str(response.headers.get("Content-Type", ""))
+            body = response.content
        filename = self._derive_remote_filename(
            file_url,
-            content_type=str(response.headers.get("Content-Type", "")),
+            content_type=content_type_hdr,
            default_name=preferred_name,
            default_ext=default_ext,
        )
-        cached_path = cache_document_from_bytes(response.content, filename)
+        cached_path = cache_document_from_bytes(body, filename)
        return cached_path, filename

    @staticmethod
@@ -243,10 +243,14 @@ class QQAdapter(BasePlatformAdapter):
            return False

        try:
+            # Tighter keepalive pool so idle CLOSE_WAIT sockets drain
+            # faster behind proxies like Cloudflare Warp (#18451).
+            from gateway.platforms._http_client_limits import platform_httpx_limits
            self._http_client = httpx.AsyncClient(
                timeout=30.0,
                follow_redirects=True,
                event_hooks={"response": [_ssrf_redirect_guard]},
+                limits=platform_httpx_limits(),
            )

            # 1. Get access token
@@ -248,7 +248,9 @@ class SignalAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)

-        self.client = httpx.AsyncClient(timeout=30.0)
+        # Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
+        from gateway.platforms._http_client_limits import platform_httpx_limits
+        self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
        try:
            # Health check — verify signal-cli daemon is reachable
            try:
@@ -512,6 +512,17 @@ class TelegramAdapter(BasePlatformAdapter):
                self.name, attempt,
            )
            self._polling_network_error_count = 0
+            # start_polling() returning is necessary but not sufficient:
+            # PTB's Updater can be left in a state where `running` is True
+            # but the underlying long-poll task is wedged on a stale httpx
+            # connection and never makes progress. No error_callback fires
+            # in that state, so the reconnect ladder won't advance on its
+            # own. Schedule a deferred probe to detect the wedge and
+            # re-enter the ladder if needed.
+            if not self.has_fatal_error:
+                probe = asyncio.ensure_future(self._verify_polling_after_reconnect())
+                self._background_tasks.add(probe)
+                probe.add_done_callback(self._background_tasks.discard)
        except Exception as retry_err:
            logger.warning("[%s] Telegram polling reconnect failed: %s", self.name, retry_err)
            # start_polling failed — polling is dead and no further error
@@ -523,6 +534,50 @@ class TelegramAdapter(BasePlatformAdapter):
                self._background_tasks.add(task)
                task.add_done_callback(self._background_tasks.discard)

+    async def _verify_polling_after_reconnect(self) -> None:
+        """Heartbeat probe scheduled after a successful reconnect.
+
+        PTB's Updater can survive a botched stop()+start_polling() cycle
+        with `running=True` but a wedged consumer task. No error callback
+        fires, so the reconnect ladder doesn't advance on its own. This
+        probe detects the wedge by:
+
+        1. Sleeping HEARTBEAT_PROBE_DELAY so a healthy long-poll has time
+           to complete at least one cycle.
+        2. Verifying `Updater.running` is still True.
+        3. Probing the bot endpoint with a tight asyncio timeout. A
+           wedged httpx pool fails this probe; a healthy one returns
+           well under the timeout.
+
+        On any failure, re-enter the reconnect ladder so the existing
+        MAX_NETWORK_RETRIES path can ultimately escalate to fatal-error.
+        """
+        HEARTBEAT_PROBE_DELAY = 60
+        PROBE_TIMEOUT = 10
+
+        await asyncio.sleep(HEARTBEAT_PROBE_DELAY)
+
+        if self.has_fatal_error:
+            return
+        if not (self._app and self._app.updater and self._app.updater.running):
+            logger.warning(
+                "[%s] Updater not running %ds after reconnect — treating as wedged",
+                self.name, HEARTBEAT_PROBE_DELAY,
+            )
+            await self._handle_polling_network_error(
+                RuntimeError("Updater not running after reconnect heartbeat")
+            )
+            return
+
+        try:
+            await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
+        except Exception as probe_err:
+            logger.warning(
+                "[%s] Polling heartbeat probe failed %ds after reconnect: %s",
+                self.name, HEARTBEAT_PROBE_DELAY, probe_err,
+            )
+            await self._handle_polling_network_error(probe_err)
+
    async def _handle_polling_conflict(self, error: Exception) -> None:
        if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
            return
@@ -206,7 +206,11 @@ class WeComAdapter(BasePlatformAdapter):
            return False

        try:
-            self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True)
+            # Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
+            from gateway.platforms._http_client_limits import platform_httpx_limits
+            self._http_client = httpx.AsyncClient(
+                timeout=30.0, follow_redirects=True, limits=platform_httpx_limits(),
+            )
            await self._open_connection()
            self._mark_connected()
            self._listen_task = asyncio.create_task(self._listen_loop())
@@ -119,7 +119,9 @@ class WecomCallbackAdapter(BasePlatformAdapter):
            pass

        try:
-            self._http_client = httpx.AsyncClient(timeout=20.0)
+            # Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
+            from gateway.platforms._http_client_limits import platform_httpx_limits
+            self._http_client = httpx.AsyncClient(timeout=20.0, limits=platform_httpx_limits())
            self._app = web.Application()
            self._app.router.add_get("/health", self._handle_health)
            self._app.router.add_get(self._path, self._handle_verify)
@@ -2030,7 +2030,9 @@ async def send_weixin_direct(

    live_adapter = _LIVE_ADAPTERS.get(resolved_token)
    send_session = getattr(live_adapter, '_send_session', None)
-    if live_adapter is not None and send_session is not None and not send_session.closed:
+    if (live_adapter is not None and send_session is not None
+            and not send_session.closed
+            and send_session._loop is asyncio.get_running_loop()):
        last_result: Optional[SendResult] = None
        cleaned = live_adapter.format_message(message)
        if cleaned:
@@ -185,6 +185,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
        self._bridge_log: Optional[Path] = None
        self._poll_task: Optional[asyncio.Task] = None
        self._http_session: Optional["aiohttp.ClientSession"] = None
+        # Set to True by disconnect() before we SIGTERM our child bridge so
+        # _check_managed_bridge_exit() can distinguish an intentional
+        # shutdown-time exit (returncode -15 / -2 / 0) from a real crash.
+        # Without this, every graceful gateway shutdown/restart would log
+        # "Fatal whatsapp adapter error" plus dispatch a fatal-error
+        # notification before the normal "✓ whatsapp disconnected" fires.
+        self._shutting_down: bool = False

    def _whatsapp_require_mention(self) -> bool:
        configured = self.config.extra.get("require_mention")
@@ -555,6 +562,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
        if returncode is None:
            return None

+        # Planned shutdown: disconnect() sets _shutting_down before it sends
+        # SIGTERM to the bridge, so a returncode of -15 (SIGTERM), -2 (SIGINT),
+        # or 0 (clean exit) at that point is expected, not a crash. Treat it
+        # as informational and skip the fatal-error path.
+        # getattr-with-default keeps tests that construct the adapter via
+        # ``WhatsAppAdapter.__new__`` (bypassing __init__) working without
+        # every _make_adapter() helper having to seed the attribute.
+        if getattr(self, "_shutting_down", False) and returncode in (0, -2, -15):
+            logger.info(
+                "[%s] Bridge exited during shutdown (code %d).",
+                self.name,
+                returncode,
+            )
+            return None
+
        message = f"WhatsApp bridge process exited unexpectedly (code {returncode})."
        if not self.has_fatal_error:
            logger.error("[%s] %s", self.name, message)
@@ -565,6 +587,10 @@ class WhatsAppAdapter(BasePlatformAdapter):

    async def disconnect(self) -> None:
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
+        # Flip the shutdown flag BEFORE signalling the child so the exit-check
+        # path (which runs from other tasks like send() and the poll loop)
+        # doesn't race us and report the intentional termination as fatal.
+        self._shutting_down = True
        if self._bridge_process:
            try:
                try:
@@ -876,11 +902,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
        try:
            import aiohttp

-            await self._http_session.post(
+            # Must wrap in `async with` — a bare `await session.post(...)`
+            # leaves the response object alive until GC, holding its TCP
+            # socket in CLOSE_WAIT. See #18451.
+            async with self._http_session.post(
                f"http://127.0.0.1:{self._bridge_port}/typing",
                json={"chatId": chat_id},
                timeout=aiohttp.ClientTimeout(total=5)
-            )
+            ):
+                pass
        except Exception:
            pass  # Ignore typing indicator failures
    
@@ -15,6 +15,7 @@ Usage:

 import asyncio
 import dataclasses
+import inspect
 import json
 import logging
 import os
@@ -406,37 +407,37 @@ if _config_path.exists():
                    os.environ[_env_map["base_url"]] = _base_url
                if _api_key:
                    os.environ[_env_map["api_key"]] = _api_key
+        # config.yaml is the documented, authoritative source for these
+        # settings — it unconditionally wins over .env values. Previously
+        # the guards below read `if X not in os.environ` and let stale
+        # .env entries (e.g. HERMES_MAX_ITERATIONS=60 written by an old
+        # `hermes setup` run) silently shadow the user's current config.
+        # See PR #18413 / the 60-vs-500 max_turns incident.
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
                os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
-            # Bridge agent.gateway_timeout → HERMES_AGENT_TIMEOUT env var.
-            # Env var from .env takes precedence (already in os.environ).
-            if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
+            if "gateway_timeout" in _agent_cfg:
                os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
-            if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
+            if "gateway_timeout_warning" in _agent_cfg:
                os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
-            if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
+            if "gateway_notify_interval" in _agent_cfg:
                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
-            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
+            if "restart_drain_timeout" in _agent_cfg:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
-            if (
-                "gateway_auto_continue_freshness" in _agent_cfg
-                and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
-            ):
+            if "gateway_auto_continue_freshness" in _agent_cfg:
                os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
                    _agent_cfg["gateway_auto_continue_freshness"]
                )
        _display_cfg = _cfg.get("display", {})
        if _display_cfg and isinstance(_display_cfg, dict):
-            if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
+            if "busy_input_mode" in _display_cfg:
                os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
-            if "busy_ack_enabled" in _display_cfg and "HERMES_GATEWAY_BUSY_ACK_ENABLED" not in os.environ:
+            if "busy_ack_enabled" in _display_cfg:
                os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
        # Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
-        # HERMES_TIMEZONE from .env takes precedence (already in os.environ).
        _tz_cfg = _cfg.get("timezone", "")
-        if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
+        if _tz_cfg and isinstance(_tz_cfg, str):
            os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
        # Security settings
        _security_cfg = _cfg.get("security", {})
@@ -444,8 +445,24 @@ if _config_path.exists():
            _redact = _security_cfg.get("redact_secrets")
            if _redact is not None:
                os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
-    except Exception:
-        pass  # Non-fatal; gateway can still run with .env values
+    except Exception as _bridge_err:
+        # Previously this was silent (`except Exception: pass`), which
+        # hid partial bridge failures and let .env defaults shadow
+        # config.yaml values — users observed max_turns=500 in config
+        # but a 60-iteration cap in practice. Surface the failure to
+        # stderr so operators see it even though `logger` is not yet
+        # initialized at module-import time (logger is defined further
+        # down this module).
+        print(
+            f"  Warning: config.yaml → env bridge failed: "
+            f"{type(_bridge_err).__name__}: {_bridge_err}",
+            file=sys.stderr,
+        )
+        print(
+            "  Gateway will fall back to .env values, which may not match "
+            "your current config.yaml. Run `hermes doctor` to investigate.",
+            file=sys.stderr,
+        )

 # Apply IPv4 preference if configured (before any HTTP clients are created).
 try:
@@ -673,11 +690,69 @@ def _is_control_interrupt_message(message: Optional[str]) -> bool:
    return normalized in _CONTROL_INTERRUPT_MESSAGES


+def _skill_slug_from_frontmatter(skill_md: Path) -> tuple[str | None, str | None]:
+    """Derive the /command slug and declared frontmatter name from a SKILL.md.
+
+    Matches the exact normalization used by
+    :func:`agent.skill_commands.scan_skill_commands` so the slug here is the
+    same string a user types after the leading ``/`` (e.g. a skill with
+    frontmatter ``name: Stable Diffusion Image Generation`` resolves to
+    ``stable-diffusion-image-generation`` — NOT the parent directory name,
+    which is commonly shorter/different, e.g. ``stable-diffusion``).
+
+    Using the directory name silently broke :func:`_check_unavailable_skill`
+    for every skill whose directory name drifted from its frontmatter name
+    (19 such skills on a standard install as of 2026-05), causing a generic
+    "unknown command" response where a "disabled — enable with …" or
+    "not installed — install with …" hint was expected.
+
+    Returns ``(slug, declared_name)`` or ``(None, None)`` when the file
+    can't be read or lacks a ``name:`` in its frontmatter.
+    """
+    try:
+        content = skill_md.read_text(encoding="utf-8", errors="replace")
+    except Exception:
+        return None, None
+    if not content.startswith("---"):
+        return None, None
+    end = content.find("\n---", 3)
+    if end < 0:
+        return None, None
+    declared_name: str | None = None
+    for line in content[3:end].splitlines():
+        line = line.strip()
+        if line.startswith("name:"):
+            raw = line.split(":", 1)[1].strip()
+            # Strip YAML quote wrappers if present
+            if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
+                raw = raw[1:-1]
+            declared_name = raw.strip()
+            break
+    if not declared_name:
+        return None, None
+    slug = declared_name.lower().replace(" ", "-").replace("_", "-")
+    # Mirror _SKILL_INVALID_CHARS and _SKILL_MULTI_HYPHEN from skill_commands
+    import re as _re
+    slug = _re.sub(r"[^a-z0-9-]", "", slug)
+    slug = _re.sub(r"-{2,}", "-", slug).strip("-")
+    if not slug:
+        return None, declared_name
+    return slug, declared_name
+
+
 def _check_unavailable_skill(command_name: str) -> str | None:
    """Check if a command matches a known-but-inactive skill.

    Returns a helpful message if the skill exists but is disabled or only
    available as an optional install. Returns None if no match found.
+
+    The slug for each on-disk skill is derived from its frontmatter ``name:``
+    (via :func:`_skill_slug_from_frontmatter`), NOT from its containing
+    directory name — because the two can differ (e.g. directory
+    ``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``
+    yields slug ``stable-diffusion-image-generation``). Matching on
+    directory name would miss that slug entirely and fall through to the
+    generic "unknown command" path.
    """
    # Normalize: command uses hyphens, skill names may use hyphens or underscores
    normalized = command_name.lower().replace("_", "-")
@@ -693,8 +768,12 @@ def _check_unavailable_skill(command_name: str) -> str | None:
            for skill_md in skills_dir.rglob("SKILL.md"):
                if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
                    continue
-                name = skill_md.parent.name.lower().replace("_", "-")
-                if name == normalized and name in disabled:
+                slug, declared_name = _skill_slug_from_frontmatter(skill_md)
+                if not slug or not declared_name:
+                    continue
+                # disabled is keyed by the declared frontmatter name (what
+                # skills.disabled / skills.platform_disabled store).
+                if slug == normalized and declared_name in disabled:
                    return (
                        f"The **{command_name}** skill is installed but disabled.\n"
                        f"Enable it with: `hermes skills config`"
@@ -706,8 +785,10 @@ def _check_unavailable_skill(command_name: str) -> str | None:
        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
        if optional_dir.exists():
            for skill_md in optional_dir.rglob("SKILL.md"):
-                name = skill_md.parent.name.lower().replace("_", "-")
-                if name == normalized:
+                slug, _declared = _skill_slug_from_frontmatter(skill_md)
+                if not slug:
+                    continue
+                if slug == normalized:
                    # Build install path: official/<category>/<name>
                    rel = skill_md.parent.relative_to(optional_dir)
                    parts = list(rel.parts)
@@ -2519,6 +2600,18 @@ class GatewayRunner:
        """
        logger.info("Starting Hermes Gateway...")
        logger.info("Session storage: %s", self.config.sessions_dir)
+        # Log the resolved max_iterations budget so operators can verify the
+        # config.yaml → env bridge did the right thing at a glance (instead
+        # of silently running at a stale .env value for weeks).
+        try:
+            _effective_max_iter = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+            logger.info(
+                "Agent budget: max_iterations=%d (agent.max_turns from config.yaml, "
+                "or HERMES_MAX_ITERATIONS from .env, or default 90)",
+                _effective_max_iter,
+            )
+        except Exception:
+            pass
        try:
            from hermes_cli.profiles import get_active_profile_name
            _profile = get_active_profile_name()
@@ -9623,6 +9716,28 @@ class GatewayRunner:
            removed = result.get("removed", [])  # [{"name", "description"}, ...]
            total = result.get("total", 0)

+            # Let each connected adapter refresh any platform-side state
+            # that cached the skill list at startup. Today that's the
+            # Discord /skill autocomplete (registered once per connect);
+            # without this call, new skills stay invisible in the
+            # dropdown and deleted skills error out when clicked. Other
+            # adapters that don't override refresh_skill_group (Telegram's
+            # BotCommand menu, Slack subcommand map, etc.) are silently
+            # skipped — the in-process reload above is enough for them.
+            for adapter in list(self.adapters.values()):
+                refresh = getattr(adapter, "refresh_skill_group", None)
+                if not callable(refresh):
+                    continue
+                try:
+                    maybe = refresh()
+                    if inspect.isawaitable(maybe):
+                        await maybe
+                except Exception as exc:
+                    logger.warning(
+                        "Adapter %s refresh_skill_group raised: %s",
+                        getattr(adapter, "name", adapter), exc,
+                    )
+
            lines = ["🔄 **Skills Reloaded**\n"]
            if not added and not removed:
                lines.append("No new skills detected.")
@@ -10366,16 +10481,28 @@ class GatewayRunner:
                return

            metadata = {"thread_id": thread_id} if thread_id else None
-            await adapter.send(
+            result = await adapter.send(
                chat_id,
                "♻ Gateway restarted successfully. Your session continues.",
                metadata=metadata,
            )
-            logger.info(
-                "Sent restart notification to %s:%s",
-                platform_str,
-                chat_id,
-            )
+            # adapter.send() catches provider errors (e.g. "Chat not found")
+            # and returns SendResult(success=False) rather than raising, so
+            # we must inspect the result before claiming success — otherwise
+            # the log line is misleading and hides real delivery failures.
+            if getattr(result, "success", False):
+                logger.info(
+                    "Sent restart notification to %s:%s",
+                    platform_str,
+                    chat_id,
+                )
+            else:
+                logger.warning(
+                    "Restart notification to %s:%s was not delivered: %s",
+                    platform_str,
+                    chat_id,
+                    getattr(result, "error", "unknown error"),
+                )
        except Exception as e:
            logger.warning("Restart notification failed: %s", e)
        finally:
@@ -10,6 +10,7 @@ To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.

 from __future__ import annotations

+import logging
 import os
 import re
 import shutil
@@ -21,6 +22,8 @@ from typing import Any

 from utils import is_truthy_value

+logger = logging.getLogger(__name__)
+
 # prompt_toolkit is an optional CLI dependency — only needed for
 # SlashCommandCompleter and SlashCommandAutoSuggest.  Gateway and test
 # environments that lack it must still be able to import this module
@@ -611,13 +614,26 @@ def _collect_gateway_skill_entries(
    try:
        from agent.skill_commands import get_skill_commands
        from tools.skills_tool import SKILLS_DIR
+        from agent.skill_utils import get_external_skills_dirs
        _skills_dir = str(SKILLS_DIR.resolve())
-        _hub_dir = str((SKILLS_DIR / ".hub").resolve())
+        _hub_dir = str((SKILLS_DIR / ".hub").resolve()).rstrip("/") + "/"
+        # Build set of allowed directory prefixes: local skills dir + any
+        # user-configured ``skills.external_dirs``. Ensure each prefix ends
+        # with ``/`` so ``/my-skills`` does not also match ``/my-skills-extra``.
+        # Without this widening, external skills are visible in
+        # ``hermes skills list`` and the agent's ``/skill-name`` dispatch but
+        # silently excluded from gateway slash menus (#8110).
+        _allowed_prefixes = [_skills_dir.rstrip("/") + "/"]
+        _allowed_prefixes.extend(
+            str(d).rstrip("/") + "/" for d in get_external_skills_dirs()
+        )
        skill_cmds = get_skill_commands()
        for cmd_key in sorted(skill_cmds):
            info = skill_cmds[cmd_key]
            skill_path = info.get("skill_md_path", "")
-            if not skill_path.startswith(_skills_dir):
+            if not skill_path:
+                continue
+            if not any(skill_path.startswith(prefix) for prefix in _allowed_prefixes):
                continue
            if skill_path.startswith(_hub_dir):
                continue
@@ -721,24 +737,40 @@ def discord_skill_commands(
 def discord_skill_commands_by_category(
    reserved_names: set[str],
 ) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
-    """Return skill entries organized by category for Discord ``/skill`` subcommand groups.
+    """Return skill entries organized by category for Discord ``/skill`` autocomplete.

-    Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
+    Skills whose directory is nested at least 2 levels under a scan root
    (e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
    category.  Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
-    *uncategorized* — the caller should register them as direct subcommands
-    of the ``/skill`` group.
+    *uncategorized*.

-    The same filtering as :func:`discord_skill_commands` is applied: hub
-    skills excluded, per-platform disabled excluded, names clamped.
+    Scan roots include the local ``SKILLS_DIR`` **and** any configured
+    ``skills.external_dirs`` — matching the widened filter applied to the
+    flat ``discord_skill_commands()`` collector in #18741. Without this
+    parity, external-dir skills are visible via ``hermes skills list`` and
+    the agent's ``/skill-name`` dispatch but silently absent from Discord's
+    ``/skill`` autocomplete.
+
+    Filtering mirrors :func:`discord_skill_commands`: hub skills excluded,
+    per-platform disabled excluded, names clamped to 32 chars, descriptions
+    clamped to 100 chars.
+
+    The legacy 25-group × 25-subcommand caps (from the old nested
+    ``/skill <cat> <name>`` layout) are **not** applied — the live caller
+    (``_register_skill_group`` in ``gateway/platforms/discord.py``, refactored
+    in PR #11580) flattens these results and feeds them into a single
+    autocomplete callback, which scales to thousands of entries without any
+    per-command payload concerns. ``hidden_count`` is retained in the return
+    tuple for backward compatibility and still reports skills dropped for
+    other reasons (32-char clamp collision vs a reserved name).

    Returns:
        ``(categories, uncategorized, hidden_count)``

        - *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
        - *uncategorized*: ``[(name, description, cmd_key), ...]``
-        - *hidden_count*: skills dropped due to Discord group limits
-          (25 subcommand groups, 25 subcommands per group)
+        - *hidden_count*: skills dropped due to name clamp collisions
+          against already-registered command names.
    """
    from pathlib import Path as _P

@@ -752,14 +784,33 @@ def discord_skill_commands_by_category(
    # Collect raw skill data --------------------------------------------------
    categories: dict[str, list[tuple[str, str, str]]] = {}
    uncategorized: list[tuple[str, str, str]] = []
-    _names_used: set[str] = set(reserved_names)
+    # Map clamped-32-char-name → what it came from, so we can emit an
+    # actionable warning on collision. Reserved (gateway-builtin) command
+    # names are marked with a sentinel so the warning distinguishes
+    # "skill collided with a reserved command" from "two skills collided
+    # on the 32-char clamp" — the latter is the rename-worthy case.
+    _names_used: dict[str, str] = {n: "<reserved>" for n in reserved_names}
    hidden = 0

    try:
        from agent.skill_commands import get_skill_commands
+        from agent.skill_utils import get_external_skills_dirs
        from tools.skills_tool import SKILLS_DIR
+
        _skills_dir = SKILLS_DIR.resolve()
        _hub_dir = (SKILLS_DIR / ".hub").resolve()
+        # Build list of (resolved_root, is_local) tuples. Each external dir
+        # becomes its own scan root for category derivation — a skill at
+        # ``<external>/mlops/foo/SKILL.md`` is still categorized as "mlops".
+        _scan_roots: list[_P] = [_skills_dir]
+        try:
+            for ext in get_external_skills_dirs():
+                try:
+                    _scan_roots.append(_P(ext).resolve())
+                except Exception:
+                    continue
+        except Exception:
+            pass
        skill_cmds = get_skill_commands()

        for cmd_key in sorted(skill_cmds):
@@ -768,33 +819,72 @@ def discord_skill_commands_by_category(
            if not skill_path:
                continue
            sp = _P(skill_path).resolve()
-            # Skip skills outside SKILLS_DIR or from the hub
-            if not str(sp).startswith(str(_skills_dir)):
-                continue
+            # Hub skills are loaded via the skill hub, not surfaced as
+            # slash commands.
            if str(sp).startswith(str(_hub_dir)):
                continue
+            # Accept skill if it lives under any scan root; record the
+            # matching root so we can derive the category correctly.
+            matched_root: _P | None = None
+            for root in _scan_roots:
+                try:
+                    sp.relative_to(root)
+                except ValueError:
+                    continue
+                matched_root = root
+                break
+            if matched_root is None:
+                continue

            skill_name = info.get("name", "")
            if skill_name in _platform_disabled:
                continue

            raw_name = cmd_key.lstrip("/")
-            # Clamp to 32 chars (Discord limit)
+            # Clamp to 32 chars (Discord per-command name limit)
            discord_name = raw_name[:32]
            if discord_name in _names_used:
+                # Two skills whose first 32 chars are identical. One wins
+                # (the first one seen, which is alphabetical because the
+                # caller iterates ``sorted(skill_cmds)``); the other is
+                # dropped from Discord's /skill autocomplete.
+                #
+                # Silently counting this as ``hidden`` (the old behavior)
+                # meant skill authors had no way to discover the drop —
+                # their skill just didn't appear in the picker. Emit a
+                # WARNING naming both sides so the author can rename the
+                # losing skill's frontmatter name to something with a
+                # distinct 32-char prefix.
+                prior = _names_used[discord_name]
+                if prior == "<reserved>":
+                    logger.warning(
+                        "Discord /skill: %r (from %r) collides on its 32-char "
+                        "clamp with a reserved gateway command name %r — the "
+                        "skill will not appear in the /skill autocomplete. "
+                        "Rename the skill's frontmatter ``name:`` to differ "
+                        "in its first 32 chars.",
+                        discord_name, cmd_key, discord_name,
+                    )
+                else:
+                    logger.warning(
+                        "Discord /skill: %r and %r both clamp to %r on "
+                        "Discord's 32-char command-name limit — only %r "
+                        "will appear in the /skill autocomplete. Rename "
+                        "one skill's frontmatter ``name:`` to differ in "
+                        "its first 32 chars.",
+                        prior, cmd_key, discord_name, prior,
+                    )
+                hidden += 1
                continue
-            _names_used.add(discord_name)
+            _names_used[discord_name] = cmd_key

            desc = info.get("description", "")
            if len(desc) > 100:
                desc = desc[:97] + "..."

-            # Determine category from the relative path within SKILLS_DIR.
-            # e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
-            try:
-                rel = sp.parent.relative_to(_skills_dir)
-            except ValueError:
-                continue
+            # Determine category from the relative path within the matched
+            # scan root. e.g. creative/ascii-art/SKILL.md → ("creative", ...)
+            rel = sp.parent.relative_to(matched_root)
            parts = rel.parts
            if len(parts) >= 2:
                cat = parts[0]
@@ -804,28 +894,7 @@ def discord_skill_commands_by_category(
    except Exception:
        pass

-    # Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
-    _MAX_GROUPS = 25
-    _MAX_PER_GROUP = 25
-
-    trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
-    group_count = 0
-    for cat in sorted(categories):
-        if group_count >= _MAX_GROUPS:
-            hidden += len(categories[cat])
-            continue
-        entries = categories[cat][:_MAX_PER_GROUP]
-        hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
-        trimmed_categories[cat] = entries
-        group_count += 1
-
-    # Uncategorized skills also count against the 25 top-level limit
-    remaining_slots = _MAX_GROUPS - group_count
-    if len(uncategorized) > remaining_slots:
-        hidden += len(uncategorized) - remaining_slots
-        uncategorized = uncategorized[:remaining_slots]
-
-    return trimmed_categories, uncategorized, hidden
+    return categories, uncategorized, hidden


 # ---------------------------------------------------------------------------
@@ -400,7 +400,12 @@ DEFAULT_CONFIG = {
        # The gateway stops accepting new work, waits for running agents
        # to finish, then interrupts any remaining runs after the timeout.
        # 0 = no drain, interrupt immediately.
-        "restart_drain_timeout": 60,
+        #
+        # 180s is calibrated for realistic in-flight agent turns: a typical
+        # coding conversation mid-reasoning runs 60–150s per call, so a 60s
+        # budget routinely interrupted legitimate work on /restart. Raise
+        # further in config.yaml if you run very-long-reasoning models.
+        "restart_drain_timeout": 180,
        # Max app-level retry attempts for API errors (connection drops,
        # provider timeouts, 5xx, etc.) before the agent surfaces the
        # failure.  The OpenAI SDK already does its own low-level retries
@@ -639,6 +644,18 @@ DEFAULT_CONFIG = {
        "cache_ttl": "5m",
    },

+    # OpenRouter-specific settings.
+    # response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
+    #   When enabled, identical requests return cached responses for free (zero billing).
+    #   This is separate from Anthropic prompt caching and works alongside it.
+    #   See: https://openrouter.ai/docs/guides/features/response-caching
+    # response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
+    #   Default 300 (5 minutes). Only used when response_cache is enabled.
+    "openrouter": {
+        "response_cache": True,
+        "response_cache_ttl": 300,
+    },
+
    # AWS Bedrock provider configuration.
    # Only used when model.provider is "bedrock".
    "bedrock": {
@@ -825,7 +842,7 @@ DEFAULT_CONFIG = {
            # Voices: alloy, echo, fable, onyx, nova, shimmer
        },
        "xai": {
-            "voice_id": "eve",
+            "voice_id": "eve",  # or custom voice ID — see https://docs.x.ai/developers/model-capabilities/audio/custom-voices
            "language": "en",
            "sample_rate": 24000,
            "bit_rate": 128000,
@@ -1801,6 +1818,29 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "tool",
    },
+    "TINYFISH_API_KEY": {
+        "description": "TinyFish API key for cloud browser, search, fetch, and agent",
+        "prompt": "TinyFish API key",
+        "url": "https://agent.tinyfish.ai/api-keys",
+        "tools": ["browser_navigate", "browser_click"],
+        "password": True,
+        "category": "tool",
+    },
+    "TINYFISH_API_URL": {
+        "description": "TinyFish browser API URL override (optional, for staging/dev)",
+        "prompt": "TinyFish API URL (leave empty for default)",
+        "url": None,
+        "tools": ["browser_navigate", "browser_click"],
+        "password": False,
+        "category": "tool",
+    },
+    "TINYFISH_BROWSER_TIMEOUT": {
+        "description": "TinyFish browser session inactivity timeout in seconds (optional, default 300)",
+        "prompt": "Browser session timeout (seconds)",
+        "tools": ["browser_navigate", "browser_click"],
+        "password": False,
+        "category": "tool",
+    },
    "CAMOFOX_URL": {
        "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
        "prompt": "Camofox server URL",
@@ -4416,6 +4456,7 @@ def show_config():
        ("TAVILY_API_KEY", "Tavily"),
        ("BROWSERBASE_API_KEY", "Browserbase"),
        ("BROWSER_USE_API_KEY", "Browser Use"),
+        ("TINYFISH_API_KEY", "TinyFish"),
        ("FAL_KEY", "FAL"),
    ]
    
@@ -4600,6 +4641,7 @@ def set_config_value(key: str, value: str):
        'FIRECRAWL_GATEWAY_URL', 'TOOL_GATEWAY_DOMAIN', 'TOOL_GATEWAY_SCHEME',
        'TOOL_GATEWAY_USER_TOKEN', 'TAVILY_API_KEY',
        'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
+        'TINYFISH_API_KEY', 'TINYFISH_API_URL', 'TINYFISH_BROWSER_TIMEOUT',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
        'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
@@ -302,9 +302,21 @@ def _cmd_rollback(args) -> int:
        print(f"  reason:      {manifest.get('reason', '?')}")
        print(f"  created_at:  {manifest.get('created_at', '?')}")
        print(f"  skill files: {manifest.get('skill_files', '?')}")
+        cron = manifest.get("cron_jobs") or {}
+        if isinstance(cron, dict):
+            if cron.get("backed_up"):
+                print(
+                    f"  cron jobs:   {cron.get('jobs_count', 0)} "
+                    f"(will be restored for skill-link fields only)"
+                )
+            else:
+                reason = cron.get("reason", "not captured")
+                print(f"  cron jobs:   not in snapshot ({reason})")
    print(
        "\nThis will replace the current ~/.hermes/skills/ tree (a safety "
-        "snapshot of the current state is taken first so this is undoable)."
+        "snapshot of the current state is taken first so this is undoable). "
+        "Cron jobs that still exist will have their skills/skill fields "
+        "restored from the snapshot; all other cron fields are left alone."
    )

    if not getattr(args, "yes", False):
@@ -263,8 +263,11 @@ def run_doctor(args):
    if env_path.exists():
        check_ok(f"{_DHH}/.env file exists")
        
-        # Check for common issues
-        content = env_path.read_text()
+        # Check for common issues. Pin encoding to UTF-8 because .env files are
+        # written as UTF-8 everywhere in the codebase, while Path.read_text()
+        # defaults to the system locale — which crashes on non-UTF-8 Windows
+        # locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
+        content = env_path.read_text(encoding="utf-8")
        if _has_provider_env_config(content):
            check_ok("API key or custom endpoint configured")
        else:
@@ -188,7 +188,7 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:

    SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
    which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
-    seconds), then exits with code 75.  Both systemd (``Restart=on-failure``
+    seconds), then exits with code 75.  Both systemd (``Restart=always``
    + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
    = false``) relaunch the process after the graceful exit.

@@ -1655,8 +1655,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
 Description={SERVICE_DESCRIPTION}
 After=network-online.target
 Wants=network-online.target
-StartLimitIntervalSec=600
-StartLimitBurst=5
+StartLimitIntervalSec=0

 [Service]
 Type=simple
@@ -1670,8 +1669,10 @@ Environment="LOGNAME={username}"
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
-Restart=on-failure
-RestartSec=30
+Restart=always
+RestartSec=60
+RestartMaxDelaySec=300
+RestartSteps=5
 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
 KillMode=mixed
 KillSignal=SIGTERM
@@ -1691,9 +1692,9 @@ WantedBy=multi-user.target
    sane_path = ":".join(path_entries)
    return f"""[Unit]
 Description={SERVICE_DESCRIPTION}
-After=network.target
-StartLimitIntervalSec=600
-StartLimitBurst=5
+After=network-online.target
+Wants=network-online.target
+StartLimitIntervalSec=0

 [Service]
 Type=simple
@@ -1702,8 +1703,10 @@ WorkingDirectory={working_dir}
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
-Restart=on-failure
-RestartSec=30
+Restart=always
+RestartSec=60
+RestartMaxDelaySec=300
+RestartSteps=5
 RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
 KillMode=mixed
 KillSignal=SIGTERM
@@ -2451,7 +2454,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    print()
    
    # Exit with code 1 if gateway fails to connect any platform,
-    # so systemd Restart=on-failure will retry on transient errors
+    # so systemd Restart=always will retry on transient errors
    verbosity = None if quiet else verbose
    try:
        success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
@@ -289,7 +289,7 @@ def _has_any_provider_configured() -> bool:
    env_file = get_env_path()
    if env_file.exists():
        try:
-            for line in env_file.read_text().splitlines():
+            for line in env_file.read_text(encoding="utf-8").splitlines():
                line = line.strip()
                if line.startswith("#") or "=" not in line:
                    continue
@@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:

    existing_lines = []
    if env_path.exists():
-        existing_lines = env_path.read_text().splitlines()
+        existing_lines = env_path.read_text(encoding="utf-8").splitlines()

    updated_keys = set()
    new_lines = []
@@ -1057,6 +1057,45 @@ def list_authenticated_providers(
        if normed:
            _builtin_endpoints.add(normed)

+    def _has_fast_aws_sdk_signal() -> bool:
+        """Return True when explicit AWS auth config is present.
+
+        This intentionally avoids botocore's full credential chain. Provider
+        picker/model-switch discovery can run for non-Bedrock providers, and
+        botocore may otherwise probe EC2 IMDS (169.254.169.254) on local
+        machines before returning no credentials.
+        """
+        if os.environ.get("AWS_BEARER_TOKEN_BEDROCK", "").strip():
+            return True
+        if (
+            os.environ.get("AWS_ACCESS_KEY_ID", "").strip()
+            and os.environ.get("AWS_SECRET_ACCESS_KEY", "").strip()
+        ):
+            return True
+        return any(
+            os.environ.get(name, "").strip()
+            for name in (
+                "AWS_PROFILE",
+                "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
+                "AWS_CONTAINER_CREDENTIALS_FULL_URI",
+                "AWS_WEB_IDENTITY_TOKEN_FILE",
+            )
+        )
+
+    def _has_aws_sdk_creds_for_listing(slug: str) -> bool:
+        """Credential check for AWS SDK providers in non-runtime discovery."""
+        slug_norm = str(slug or "").strip().lower()
+        current_norm = str(current_provider or "").strip().lower()
+        if _has_fast_aws_sdk_signal():
+            return True
+        if slug_norm != current_norm:
+            return False
+        try:
+            from agent.bedrock_adapter import has_aws_credentials
+            return bool(has_aws_credentials())
+        except Exception:
+            return False
+
    data = fetch_models_dev()

    # Build curated model lists keyed by hermes provider ID
@@ -1184,7 +1223,9 @@ def list_authenticated_providers(

        # Check if credentials exist
        has_creds = False
-        if overlay.extra_env_vars:
+        if overlay.auth_type == "aws_sdk":
+            has_creds = _has_aws_sdk_creds_for_listing(hermes_slug)
+        elif overlay.extra_env_vars:
            has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
        # Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
        if not has_creds and overlay.auth_type == "api_key":
@@ -1324,11 +1365,7 @@ def list_authenticated_providers(
        # credentials come from the boto3 credential chain (env vars,
        # ~/.aws/credentials, instance roles, etc.)
        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
-            try:
-                from agent.bedrock_adapter import has_aws_credentials
-                _cp_has_creds = has_aws_credentials()
-            except Exception:
-                pass
+            _cp_has_creds = _has_aws_sdk_creds_for_listing(_cp.slug)

        if not _cp_has_creds:
            continue
@@ -141,6 +141,7 @@ def _browser_label(current_provider: str) -> str:
        "browserbase": "Browserbase",
        "browser-use": "Browser Use",
        "firecrawl": "Firecrawl",
+        "tinyfish": "TinyFish",
        "camofox": "Camofox",
        "local": "Local browser",
    }
@@ -169,6 +170,7 @@ def _resolve_browser_feature_state(
    direct_browserbase: bool,
    direct_browser_use: bool,
    direct_firecrawl: bool,
+    direct_tinyfish: bool,
    managed_browser_available: bool,
 ) -> tuple[str, bool, bool, bool]:
    """Resolve browser availability using the same precedence as runtime."""
@@ -196,6 +198,10 @@ def _resolve_browser_feature_state(
            available = bool(browser_local_available and direct_firecrawl)
            active = bool(browser_tool_enabled and available)
            return current_provider, available, active, False
+        if current_provider == "tinyfish":
+            available = bool(browser_local_available and direct_tinyfish)
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, False
        if current_provider == "camofox":
            return current_provider, False, False, False

@@ -286,6 +292,7 @@ def get_nous_subscription_features(
    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
    direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
    direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
+    direct_tinyfish = bool(get_env_value("TINYFISH_API_KEY"))
    direct_modal = has_direct_modal_credentials()

    # When use_gateway is set, suppress direct credentials for managed detection
@@ -363,6 +370,7 @@ def get_nous_subscription_features(
        direct_browserbase=direct_browserbase,
        direct_browser_use=direct_browser_use,
        direct_firecrawl=direct_firecrawl,
+        direct_tinyfish=direct_tinyfish,
        managed_browser_available=managed_browser_available,
    )

@@ -384,7 +384,7 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))

-    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
+    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, Firecrawl, or TinyFish)
    browser_provider = subscription_features.browser.current_provider
    if subscription_features.browser.managed_by_nous:
        tool_status.append(("Browser Automation (Nous Browser Use)", True, None))
@@ -406,6 +406,10 @@ def _print_setup_summary(config: dict, hermes_home):
            )
        elif browser_provider == "Camofox":
            missing_browser_hint = "CAMOFOX_URL"
+        elif browser_provider == "TinyFish":
+            missing_browser_hint = (
+                "npm install -g agent-browser and set TINYFISH_API_KEY"
+            )
        elif browser_provider == "Local browser":
            missing_browser_hint = "npm install -g agent-browser"
        tool_status.append(
@@ -1190,6 +1194,13 @@ def _setup_tts_provider(config: dict):
                    "Falling back to Edge TTS."
                )
                selected = "edge"
+        if selected == "xai":
+            print()
+            voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")
+            if voice_id and voice_id.strip():
+                config.setdefault("tts", {}).setdefault("xai", {})["voice_id"] = voice_id.strip()
+                print_success(f"xAI voice_id set to: {voice_id.strip()}")
+

    elif selected == "minimax":
        existing = get_env_value("MINIMAX_API_KEY")
@@ -1643,7 +1654,11 @@ def setup_terminal_backend(config: dict):
 def _apply_default_agent_settings(config: dict):
    """Apply recommended defaults for all agent settings without prompting."""
    config.setdefault("agent", {})["max_turns"] = 90
-    save_env_value("HERMES_MAX_ITERATIONS", "90")
+    # config.yaml is the authoritative source for max_turns; the gateway
+    # bridges it into HERMES_MAX_ITERATIONS at startup. We no longer write
+    # to .env to avoid the dual-source inconsistency that caused the
+    # 60-vs-500 bug (stale .env entry silently shadowing config.yaml).
+    remove_env_value("HERMES_MAX_ITERATIONS")

    config.setdefault("display", {})["tool_progress"] = "all"

@@ -1673,9 +1688,10 @@ def setup_agent_settings(config: dict):
    print()

    # ── Max Iterations ──
-    current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
-        cfg_get(config, "agent", "max_turns", default=90)
-    )
+    # config.yaml is authoritative; read from there. If a legacy .env
+    # entry is still around (from pre-PR#18413 setups), prefer the
+    # config value so we don't surface a stale number to the user.
+    current_max = str(cfg_get(config, "agent", "max_turns", default=90))
    print_info("Maximum tool-calling iterations per conversation.")
    print_info("Higher = more complex tasks, but costs more tokens.")
    print_info(
@@ -1686,9 +1702,13 @@ def setup_agent_settings(config: dict):
    try:
        max_iter = int(max_iter_str)
        if max_iter > 0:
-            save_env_value("HERMES_MAX_ITERATIONS", str(max_iter))
+            # Write to config.yaml (authoritative) only. Also clean up any
+            # stale .env entry from earlier setup runs — the gateway's
+            # bridge in gateway/run.py now unconditionally derives
+            # HERMES_MAX_ITERATIONS from agent.max_turns at startup.
            config.setdefault("agent", {})["max_turns"] = max_iter
            config.pop("max_turns", None)
+            remove_env_value("HERMES_MAX_ITERATIONS")
            print_success(f"Max iterations set to {max_iter}")
    except ValueError:
        print_warning("Invalid number, keeping current value")
@@ -379,6 +379,15 @@ TOOL_CATEGORIES = {
                "browser_provider": "firecrawl",
                "post_setup": "agent_browser",
            },
+            {
+                "name": "TinyFish",
+                "tag": "Low latency browser with stealth & proxies",
+                "env_vars": [
+                    {"key": "TINYFISH_API_KEY", "prompt": "TinyFish API key", "url": "https://agent.tinyfish.ai/api-keys"},
+                ],
+                "browser_provider": "tinyfish",
+                "post_setup": "agent_browser",
+            },
            {
                "name": "Camofox",
                "badge": "free · local",
@@ -1822,7 +1831,7 @@ def _reconfigure_tool(config: dict):
        cat = TOOL_CATEGORIES.get(ts_key)
        reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
        if cat or reqs:
-            if _toolset_has_keys(ts_key, config):
+            if _toolset_has_keys(ts_key, config) or _toolset_enabled_for_reconfigure(ts_key, config):
                configurable.append((ts_key, ts_label))

    if not configurable:
@@ -1848,6 +1857,28 @@ def _reconfigure_tool(config: dict):
    save_config(config)


+def _toolset_enabled_for_reconfigure(ts_key: str, config: dict) -> bool:
+    """Return True if a configurable toolset is enabled anywhere.
+
+    Reconfigure must include enabled-but-unconfigured categories so users can
+    finish provider/API-key setup without disabling and re-enabling the toolset.
+    """
+    for platform in PLATFORMS:
+        if not _toolset_allowed_for_platform(ts_key, platform):
+            continue
+        try:
+            enabled = _get_platform_tools(
+                config,
+                platform,
+                include_default_mcp_servers=False,
+            )
+        except Exception:
+            continue
+        if ts_key in enabled:
+            return True
+    return False
+
+
 def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
    """Reconfigure a tool category - provider selection + API key update."""
    icon = cat.get("icon", "")
@@ -2882,6 +2882,25 @@ _VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
 # loopback so tests don't need to rewrite request scope.
 _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})

+
+def _is_public_bind() -> bool:
+    """True when bound to all-interfaces (operator used --insecure)."""
+    return getattr(app.state, "bound_host", "") in ("0.0.0.0", "::")
+
+
+def _ws_client_is_allowed(ws: "WebSocket") -> bool:
+    """Check if the WebSocket client IP is acceptable.
+
+    Allows loopback always; allows any IP when bound to all-interfaces
+    (--insecure mode, guarded by session token auth).
+    """
+    if _is_public_bind():
+        return True
+    client_host = ws.client.host if ws.client else ""
+    if not client_host:
+        return True
+    return client_host in _LOOPBACK_HOSTS
+
 # Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
 # and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
 # the chat tab generates on mount; entries auto-evict when the last subscriber
@@ -2972,8 +2991,7 @@ async def pty_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@@ -3080,8 +3098,7 @@ async def gateway_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@@ -3113,8 +3130,7 @@ async def pub_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@@ -3143,8 +3159,7 @@ async def events_ws(ws: WebSocket) -> None:
        await ws.close(code=4401)
        return

-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
+    if not _ws_client_is_allowed(ws):
        await ws.close(code=4403)
        return

@@ -8,14 +8,64 @@ import os
 from pathlib import Path


+_profile_fallback_warned: bool = False
+
+
 def get_hermes_home() -> Path:
    """Return the Hermes home directory (default: ~/.hermes).

    Reads HERMES_HOME env var, falls back to ~/.hermes.
    This is the single source of truth — all other copies should import this.
+
+    When ``HERMES_HOME`` is unset but an ``active_profile`` file indicates
+    a non-default profile is active, logs a loud one-shot warning to
+    ``errors.log`` so cross-profile data corruption is diagnosable instead
+    of silent.  Behavior is unchanged otherwise — we still return
+    ``~/.hermes`` — because raising here would brick 30+ module-level
+    callers that import this at load time.  Subprocess spawners are
+    expected to propagate ``HERMES_HOME`` explicitly (see the systemd
+    template in ``hermes_cli/gateway.py`` and the kanban dispatcher in
+    ``hermes_cli/kanban_db.py``).  See https://github.com/NousResearch/hermes-agent/issues/18594.
    """
    val = os.environ.get("HERMES_HOME", "").strip()
-    return Path(val) if val else Path.home() / ".hermes"
+    if val:
+        return Path(val)
+
+    # Guard: if a non-default profile is sticky-active, warn once that
+    # the fallback to the default profile is almost certainly wrong.
+    global _profile_fallback_warned
+    if not _profile_fallback_warned:
+        try:
+            # Inline the default-root resolution from get_default_hermes_root()
+            # to stay import-safe (this function is called from module scope
+            # in 30+ files; we cannot afford to trigger logging setup here).
+            active_path = (Path.home() / ".hermes" / "active_profile")
+            active = active_path.read_text().strip() if active_path.exists() else ""
+        except (UnicodeDecodeError, OSError):
+            active = ""
+        if active and active != "default":
+            _profile_fallback_warned = True
+            # Write directly to stderr.  We intentionally do NOT route this
+            # through ``logging`` because (a) this function is called at
+            # module-import time from 30+ sites, often before logging is
+            # configured, and (b) root-logger propagation would double-emit
+            # on consoles where a StreamHandler is already attached.
+            import sys
+            msg = (
+                f"[HERMES_HOME fallback] HERMES_HOME is unset but active "
+                f"profile is {active!r}. Falling back to ~/.hermes, which "
+                f"is the DEFAULT profile — not {active!r}. Any data this "
+                f"process writes will land in the wrong profile. The "
+                f"subprocess spawner should pass HERMES_HOME explicitly "
+                f"(see issue #18594)."
+            )
+            try:
+                sys.stderr.write(msg + "\n")
+                sys.stderr.flush()
+            except Exception:
+                pass
+
+    return Path.home() / ".hermes"


 def get_default_hermes_root() -> Path:
@@ -1258,6 +1258,10 @@ class AIAgent:
        # after each API call.  Accessed by /usage slash command.
        self._rate_limit_state: Optional["RateLimitState"] = None

+        # OpenRouter response cache hit counter — incremented when
+        # X-OpenRouter-Cache-Status: HIT is seen in streaming response headers.
+        self._or_cache_hits: int = 0
+
        # Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
        # both live under ~/.hermes/logs/.  Idempotent, so gateway mode
        # (which creates a new AIAgent per message) won't duplicate handlers.
@@ -1421,11 +1425,8 @@ class AIAgent:
                    client_kwargs["args"] = self.acp_args
                effective_base = base_url
                if base_url_host_matches(effective_base, "openrouter.ai"):
-                    client_kwargs["default_headers"] = {
-                        "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-                        "X-OpenRouter-Title": "Hermes Agent",
-                        "X-OpenRouter-Categories": "productivity,cli-agent",
-                    }
+                    from agent.auxiliary_client import build_or_headers
+                    client_kwargs["default_headers"] = build_or_headers()
                elif base_url_host_matches(effective_base, "api.routermint.com"):
                    client_kwargs["default_headers"] = _routermint_headers()
                elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
@@ -1473,17 +1474,49 @@ class AIAgent:
                                _env_hint = _pcfg.api_key_env_vars[0]
                        except Exception:
                            pass
+                        # --- Init-time fallback (#17929) ---
+                        _fb_entries = []
+                        if isinstance(fallback_model, list):
+                            _fb_entries = [
+                                f for f in fallback_model
+                                if isinstance(f, dict) and f.get("provider") and f.get("model")
+                            ]
+                        elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"):
+                            _fb_entries = [fallback_model]
+                        _fb_resolved = False
+                        for _fb in _fb_entries:
+                            _fb_client, _fb_model = resolve_provider_client(
+                                _fb["provider"], model=_fb["model"], raw_codex=True,
+                                explicit_base_url=_fb.get("base_url"),
+                                explicit_api_key=_fb.get("api_key"),
+                            )
+                            if _fb_client is not None:
+                                self.provider = _fb["provider"]
+                                self.model = _fb_model or _fb["model"]
+                                self._fallback_activated = True
+                                client_kwargs = {
+                                    "api_key": _fb_client.api_key,
+                                    "base_url": str(_fb_client.base_url),
+                                }
+                                if _provider_timeout is not None:
+                                    client_kwargs["timeout"] = _provider_timeout
+                                if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
+                                    client_kwargs["default_headers"] = dict(_fb_client._default_headers)
+                                _fb_resolved = True
+                                break
+                        if not _fb_resolved:
+                            raise RuntimeError(
+                                f"Provider '{_explicit}' is set in config.yaml but no API key "
+                                f"was found. Set the {_env_hint} environment "
+                                f"variable, or switch to a different provider with `hermes model`."
+                            )
+                    if not getattr(self, "_fallback_activated", False):
+                        # No provider configured — reject with a clear message.
                        raise RuntimeError(
-                            f"Provider '{_explicit}' is set in config.yaml but no API key "
-                            f"was found. Set the {_env_hint} environment "
-                            f"variable, or switch to a different provider with `hermes model`."
+                            "No LLM provider configured. Run `hermes model` to "
+                            "select a provider, or run `hermes setup` for first-time "
+                            "configuration."
                        )
-                    # No provider configured — reject with a clear message.
-                    raise RuntimeError(
-                        "No LLM provider configured. Run `hermes model` to "
-                        "select a provider, or run `hermes setup` for first-time "
-                        "configuration."
-                    )
            
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt

@@ -1536,7 +1569,7 @@ class AIAgent:
        else:
            self._fallback_chain = []
        self._fallback_index = 0
-        self._fallback_activated = False
+        self._fallback_activated = getattr(self, "_fallback_activated", False)
        # Legacy attribute kept for backward compat (tests, external callers)
        self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
        if self._fallback_chain and not self.quiet_mode:
@@ -4548,6 +4581,28 @@ class AIAgent:
        """Return the last captured RateLimitState, or None."""
        return self._rate_limit_state

+    def _check_openrouter_cache_status(self, http_response: Any) -> None:
+        """Read X-OpenRouter-Cache-Status from response headers and log it.
+
+        Increments ``_or_cache_hits`` on HIT so callers can report savings.
+        """
+        if http_response is None:
+            return
+        headers = getattr(http_response, "headers", None)
+        if not headers:
+            return
+        try:
+            status = headers.get("x-openrouter-cache-status")
+            if not status:
+                return
+            if status.upper() == "HIT":
+                self._or_cache_hits += 1
+                logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits)
+            else:
+                logger.debug("OpenRouter response cache %s", status.upper())
+        except Exception:
+            pass  # Never let header parsing break the agent loop
+
    def get_activity_summary(self) -> dict:
        """Return a snapshot of the agent's current activity for diagnostics.

@@ -6125,10 +6180,10 @@ class AIAgent:
        return True

    def _apply_client_headers_for_base_url(self, base_url: str) -> None:
-        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
+        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers

        if base_url_host_matches(base_url, "openrouter.ai"):
-            self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
+            self._client_kwargs["default_headers"] = build_or_headers()
        elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
            self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
        elif base_url_host_matches(base_url, "api.routermint.com"):
@@ -6748,6 +6803,9 @@ class AIAgent:
            # response via .response before any chunks are consumed.
            self._capture_rate_limits(getattr(stream, "response", None))

+            # Log OpenRouter response cache status when present.
+            self._check_openrouter_cache_status(getattr(stream, "response", None))
+
            content_parts: list = []
            tool_calls_acc: dict = {}
            tool_gen_notified: set = set()
@@ -46,6 +46,7 @@ AUTHOR_MAP = {
    "leone.parise@gmail.com": "leoneparise",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "159539633+MottledShadow@users.noreply.github.com": "MottledShadow",
    "aludwin+gh@gmail.com": "adamludwin",
    "2093036+exiao@users.noreply.github.com": "exiao",
    "rylen.anil@gmail.com": "rylena",
@@ -370,6 +371,10 @@ AUTHOR_MAP = {
    "xowiekk@gmail.com": "Xowiek",
    "1243352777@qq.com": "zons-zhaozhy",
    "e.silacandmr@gmail.com": "Es1la",
+    "h3057183414@gmail.com": "CoreyNoDream",
+    "franksong2702@gmail.com": "franksong2702",
+    "673088860@qq.com": "ambition0802",
+    "beibei1988@proton.me": "beibi9966",
    # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
    #    crossref, and GH contributor list matching (April 2026 audit) ──
    "1115117931@qq.com": "aaronagent",
@@ -668,6 +673,7 @@ AUTHOR_MAP = {
    "web3blind@gmail.com": "web3blind",
    "ztzheng@163.com": "chengoak",  # PR #17467
    "24110240104@m.fudan.edu.cn": "YuShu",  # co-author only
+    "simantak@mac.local": "simantak-dabhade",  # PR #6329
 }


@@ -178,9 +178,10 @@ class TestMcpRegistrationE2E:
        complete_event = completions[0]
        assert isinstance(complete_event, ToolCallProgress)
        assert complete_event.status == "completed"
-        # rawOutput should contain the tool result string
-        assert complete_event.raw_output is not None
-        assert "hello" in str(complete_event.raw_output)
+        # Completion should contain human-readable output rather than forcing raw JSON panes.
+        assert complete_event.content
+        assert "hello" in complete_event.content[0].content.text
+        assert complete_event.raw_output is None

    def test_patch_mode_tool_start_emits_diff_blocks_for_v4a_patch(self):
        update = build_tool_start(
@@ -27,7 +27,10 @@ from acp.schema import (
    SetSessionModeResponse,
    SessionInfo,
    TextContentBlock,
+    ToolCallProgress,
+    ToolCallStart,
    Usage,
+    UsageUpdate,
    UserMessageChunk,
 )
 from acp_adapter.server import HermesACPAgent, HERMES_VERSION
@@ -200,6 +203,8 @@ class TestSessionOps:
            "context",
            "reset",
            "compact",
+            "steer",
+            "queue",
            "version",
        ]
        model_cmd = next(
@@ -208,6 +213,46 @@ class TestSessionOps:
        assert model_cmd.input is not None
        assert model_cmd.input.root.hint == "model name to switch to"

+    def test_build_usage_update_for_zed_context_indicator(self, agent, mock_manager):
+        state = mock_manager.create_session(cwd="/tmp")
+        state.history = [{"role": "user", "content": "hello"}]
+        state.agent.context_compressor = MagicMock(context_length=100_000)
+        state.agent._cached_system_prompt = "system"
+        state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
+
+        with patch(
+            "agent.model_metadata.estimate_request_tokens_rough",
+            return_value=25_000,
+        ):
+            update = agent._build_usage_update(state)
+
+        assert isinstance(update, UsageUpdate)
+        assert update.session_update == "usage_update"
+        assert update.size == 100_000
+        assert update.used == 25_000
+
+    @pytest.mark.asyncio
+    async def test_send_usage_update_to_client(self, agent, mock_manager):
+        state = mock_manager.create_session(cwd="/tmp")
+        state.agent.context_compressor = MagicMock(context_length=100_000)
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        with patch(
+            "agent.model_metadata.estimate_request_tokens_rough",
+            return_value=25_000,
+        ):
+            await agent._send_usage_update(state)
+
+        mock_conn.session_update.assert_awaited_once()
+        call = mock_conn.session_update.await_args
+        assert call.kwargs["session_id"] == state.session_id
+        update = call.kwargs["update"]
+        assert isinstance(update, UsageUpdate)
+        assert update.size == 100_000
+        assert update.used == 25_000
+
    @pytest.mark.asyncio
    async def test_cancel_sets_event(self, agent):
        resp = await agent.new_session(cwd=".")
@@ -238,11 +283,31 @@ class TestSessionOps:
            {"role": "system", "content": "hidden system"},
            {"role": "user", "content": "what controls the / slash commands?"},
            {"role": "assistant", "content": "HermesACPAgent._ADVERTISED_COMMANDS controls them."},
-            {"role": "tool", "content": "tool output should not replay"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_search_1",
+                        "type": "function",
+                        "function": {
+                            "name": "search_files",
+                            "arguments": '{"pattern":"slash commands","path":"."}',
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_search_1",
+                "content": '{"total_count":1,"matches":[{"path":"cli.py","line":42,"content":"slash commands"}]}',
+            },
        ]

        mock_conn.session_update.reset_mock()
        resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)

        assert isinstance(resp, LoadSessionResponse)
        calls = mock_conn.session_update.await_args_list
@@ -257,6 +322,21 @@ class TestSessionOps:
        assert isinstance(replay_calls[1].kwargs["update"], AgentMessageChunk)
        assert replay_calls[1].kwargs["update"].content.text.startswith("HermesACPAgent")

+        tool_updates = [
+            call.kwargs["update"]
+            for call in calls
+            if getattr(call.kwargs.get("update"), "session_update", None)
+            in {"tool_call", "tool_call_update"}
+        ]
+        assert len(tool_updates) == 2
+        assert isinstance(tool_updates[0], ToolCallStart)
+        assert tool_updates[0].tool_call_id == "call_search_1"
+        assert tool_updates[0].title == "search: slash commands"
+        assert isinstance(tool_updates[1], ToolCallProgress)
+        assert tool_updates[1].tool_call_id == "call_search_1"
+        assert "Search results" in tool_updates[1].content[0].content.text
+        assert "cli.py:42" in tool_updates[1].content[0].content.text
+
    @pytest.mark.asyncio
    async def test_resume_session_replays_persisted_history_to_client(self, agent):
        mock_conn = MagicMock(spec=acp.Client)
@@ -269,6 +349,8 @@ class TestSessionOps:

        mock_conn.session_update.reset_mock()
        resp = await agent.resume_session(cwd="/tmp", session_id=new_resp.session_id)
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)

        assert isinstance(resp, ResumeSessionResponse)
        updates = [call.kwargs["update"] for call in mock_conn.session_update.await_args_list]
@@ -278,6 +360,27 @@ class TestSessionOps:
            for update in updates
        )

+    @pytest.mark.asyncio
+    async def test_load_session_schedules_history_replay_after_response(self, agent):
+        """Zed only attaches replayed updates after session/load has completed."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        state = agent.session_manager.get_session(new_resp.session_id)
+        state.history = [{"role": "user", "content": "hello from history"}]
+        events = []
+
+        async def replay_after_response(_state):
+            events.append("replay")
+
+        with patch.object(agent, "_replay_session_history", side_effect=replay_after_response):
+            resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
+            events.append("returned")
+
+        assert isinstance(resp, LoadSessionResponse)
+        assert events == ["returned"]
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+        assert events == ["returned", "replay"]
+
    @pytest.mark.asyncio
    async def test_resume_session_creates_new_if_missing(self, agent):
        resume_resp = await agent.resume_session(cwd="/tmp", session_id="nonexistent")
@@ -522,6 +625,11 @@ class TestPrompt:
        assert isinstance(resp, PromptResponse)
        assert resp.stop_reason == "end_turn"
        state.agent.run_conversation.assert_called_once()
+        assert state.agent.tool_progress_callback is not None
+        assert state.agent.step_callback is not None
+        assert state.agent.stream_delta_callback is not None
+        assert state.agent.reasoning_callback is not None
+        assert state.agent.thinking_callback is None

    @pytest.mark.asyncio
    async def test_prompt_updates_history(self, agent):
@@ -565,12 +673,40 @@ class TestPrompt:
        prompt = [TextContentBlock(type="text", text="help me")]
        await agent.prompt(prompt=prompt, session_id=new_resp.session_id)

-        # session_update should have been called with the final message
+        # session_update should include the final message (usage_update may follow it)
        mock_conn.session_update.assert_called()
-        # Get the last call's update argument
-        last_call = mock_conn.session_update.call_args_list[-1]
-        update = last_call[1].get("update") or last_call[0][1]
-        assert update.session_update == "agent_message_chunk"
+        updates = [
+            call.kwargs.get("update") or call.args[1]
+            for call in mock_conn.session_update.call_args_list
+        ]
+        assert any(update.session_update == "agent_message_chunk" for update in updates)
+
+    @pytest.mark.asyncio
+    async def test_prompt_does_not_duplicate_streamed_final_message(self, agent):
+        """If ACP already streamed response chunks, final_response should not be sent again."""
+        new_resp = await agent.new_session(cwd=".")
+        state = agent.session_manager.get_session(new_resp.session_id)
+
+        def mock_run(*args, **kwargs):
+            state.agent.stream_delta_callback("streamed answer")
+            return {"final_response": "streamed answer", "messages": []}
+
+        state.agent.run_conversation = mock_run
+
+        mock_conn = MagicMock(spec=acp.Client)
+        mock_conn.session_update = AsyncMock()
+        agent._conn = mock_conn
+
+        prompt = [TextContentBlock(type="text", text="hello")]
+        await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
+
+        updates = [
+            call.kwargs.get("update") or call.args[1]
+            for call in mock_conn.session_update.call_args_list
+        ]
+        agent_chunks = [update for update in updates if update.session_update == "agent_message_chunk"]
+        assert len(agent_chunks) == 1
+        assert agent_chunks[0].content.text == "streamed answer"

    @pytest.mark.asyncio
    async def test_prompt_auto_titles_session(self, agent):
@@ -708,6 +844,43 @@ class TestSlashCommands:
        assert "2 messages" in result
        assert "user: 1" in result

+    def test_context_shows_usage_and_compression_threshold(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [{"role": "user", "content": "hello"}]
+        state.agent.context_compressor = MagicMock(
+            context_length=100_000,
+            threshold_tokens=80_000,
+        )
+        state.agent._cached_system_prompt = "system"
+        state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
+
+        with patch(
+            "agent.model_metadata.estimate_request_tokens_rough",
+            return_value=25_000,
+        ):
+            result = agent._handle_slash_command("/context", state)
+
+        assert "Context usage: ~25,000 / 100,000 tokens (25.0%)" in result
+        assert "Compression: ~55,000 tokens until threshold (~80,000, 80%)" in result
+        assert "Tip: run /compact" in result
+
+    def test_context_says_compression_due_when_past_threshold(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [{"role": "user", "content": "hello"}]
+        state.agent.context_compressor = MagicMock(
+            context_length=100_000,
+            threshold_tokens=80_000,
+        )
+
+        with patch(
+            "agent.model_metadata.estimate_request_tokens_rough",
+            return_value=82_000,
+        ):
+            result = agent._handle_slash_command("/context", state)
+
+        assert "Context usage: ~82,000 / 100,000 tokens (82.0%)" in result
+        assert "Compression: due now (threshold ~80,000, 80%). Run /compact." in result
+
    def test_reset_clears_history(self, agent, mock_manager):
        state = self._make_state(mock_manager)
        state.history = [{"role": "user", "content": "hello"}]
@@ -787,7 +960,12 @@ class TestSlashCommands:
        resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)

        assert resp.stop_reason == "end_turn"
-        mock_conn.session_update.assert_called_once()
+        updates = [
+            call.kwargs.get("update") or call.args[1]
+            for call in mock_conn.session_update.call_args_list
+        ]
+        assert any(update.session_update == "agent_message_chunk" for update in updates)
+        assert any(update.session_update == "usage_update" for update in updates)

    @pytest.mark.asyncio
    async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager):
@@ -52,6 +52,12 @@ class TestToolKindMap:
    def test_tool_kind_execute_code(self):
        assert get_tool_kind("execute_code") == "execute"

+    def test_tool_kind_todo(self):
+        assert get_tool_kind("todo") == "other"
+
+    def test_tool_kind_skill_view(self):
+        assert get_tool_kind("skill_view") == "read"
+
    def test_tool_kind_browser_navigate(self):
        assert get_tool_kind("browser_navigate") == "fetch"

@@ -110,6 +116,25 @@ class TestBuildToolTitle:
        title = build_tool_title("web_search", {"query": "python asyncio"})
        assert "python asyncio" in title

+    def test_skill_view_title_includes_skill_name(self):
+        title = build_tool_title("skill_view", {"name": "github-pitfalls"})
+        assert title == "skill view (github-pitfalls)"
+
+    def test_skill_view_title_includes_linked_file(self):
+        title = build_tool_title("skill_view", {"name": "github-pitfalls", "file_path": "references/api.md"})
+        assert title == "skill view (github-pitfalls/references/api.md)"
+
+    def test_execute_code_title_includes_first_code_line(self):
+        title = build_tool_title("execute_code", {"code": "\nfrom hermes_tools import terminal\nprint('done')"})
+        assert title == "python: from hermes_tools import terminal"
+
+    def test_skill_manage_title_includes_action_and_target(self):
+        title = build_tool_title(
+            "skill_manage",
+            {"action": "patch", "name": "hermes-agent-operations", "file_path": "references/acp.md"},
+        )
+        assert title == "skill patch: hermes-agent-operations/references/acp.md"
+
    def test_unknown_tool_uses_name(self):
        title = build_tool_title("some_new_tool", {"foo": "bar"})
        assert title == "some_new_tool"
@@ -164,15 +189,23 @@ class TestBuildToolStart:
        assert "ls -la /tmp" in text

    def test_build_tool_start_for_read_file(self):
-        """read_file should include the path in content."""
+        """read_file start should stay compact; completion carries file contents."""
        args = {"path": "/etc/hosts", "offset": 1, "limit": 50}
        result = build_tool_start("tc-3", "read_file", args)
        assert isinstance(result, ToolCallStart)
        assert result.kind == "read"
-        assert len(result.content) >= 1
-        content_item = result.content[0]
-        assert isinstance(content_item, ContentToolCallContent)
-        assert "/etc/hosts" in content_item.content.text
+        assert result.content is None
+        assert result.raw_input is None
+
+    def test_build_tool_start_for_web_extract_is_compact(self):
+        """web_extract start should stay compact; title identifies URLs."""
+        args = {"urls": ["https://example.com/docs"]}
+        result = build_tool_start("tc-web-start", "web_extract", args)
+        assert isinstance(result, ToolCallStart)
+        assert result.title == "extract: https://example.com/docs"
+        assert result.kind == "fetch"
+        assert result.content is None
+        assert result.raw_input is None

    def test_build_tool_start_for_search(self):
        """search_files should include pattern in content."""
@@ -181,6 +214,48 @@ class TestBuildToolStart:
        assert isinstance(result, ToolCallStart)
        assert result.kind == "search"
        assert "TODO" in result.content[0].content.text
+        assert result.raw_input is None
+
+    def test_build_tool_start_for_todo_is_human_readable(self):
+        args = {"todos": [{"id": "one", "content": "Fix ACP rendering", "status": "in_progress"}]}
+        result = build_tool_start("tc-todo", "todo", args)
+        assert result.title == "todo (1 item)"
+        assert "Fix ACP rendering" in result.content[0].content.text
+        assert result.raw_input is None
+
+    def test_build_tool_start_for_skill_view_is_human_readable(self):
+        result = build_tool_start("tc-skill", "skill_view", {"name": "github-pitfalls"})
+        assert result.title == "skill view (github-pitfalls)"
+        assert "github-pitfalls" in result.content[0].content.text
+        assert result.raw_input is None
+
+    def test_build_tool_start_for_execute_code_shows_code_preview(self):
+        result = build_tool_start("tc-code", "execute_code", {"code": "print('hello')"})
+        assert result.kind == "execute"
+        assert result.title == "python: print('hello')"
+        assert "```python" in result.content[0].content.text
+        assert "print('hello')" in result.content[0].content.text
+        assert result.raw_input is None
+
+    def test_build_tool_start_for_skill_manage_patch_shows_diff(self):
+        result = build_tool_start(
+            "tc-skill-manage",
+            "skill_manage",
+            {
+                "action": "patch",
+                "name": "hermes-agent-operations",
+                "file_path": "references/acp.md",
+                "old_string": "old advice",
+                "new_string": "new advice",
+            },
+        )
+        assert result.kind == "edit"
+        assert result.title == "skill patch: hermes-agent-operations/references/acp.md"
+        assert isinstance(result.content[0], FileEditToolCallContent)
+        assert result.content[0].path == "skills/hermes-agent-operations/references/acp.md"
+        assert result.content[0].old_text == "old advice"
+        assert result.content[0].new_text == "new advice"
+        assert result.raw_input is None

    def test_build_tool_start_generic_fallback(self):
        """Unknown tools should get a generic text representation."""
@@ -205,6 +280,158 @@ class TestBuildToolComplete:
        content_item = result.content[0]
        assert isinstance(content_item, ContentToolCallContent)
        assert "total 42" in content_item.content.text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_todo_is_checklist(self):
+        result = build_tool_complete(
+            "tc-todo",
+            "todo",
+            '{"todos":[{"id":"a","content":"Inspect ACP","status":"completed"},{"id":"b","content":"Patch renderers","status":"in_progress"}],"summary":{"total":2,"pending":0,"in_progress":1,"completed":1,"cancelled":0}}',
+        )
+        text = result.content[0].content.text
+        assert "✅ Inspect ACP" in text
+        assert "- 🔄 Patch renderers" in text
+        assert "**Progress:** 1 completed, 1 in progress, 0 pending" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_skill_view_summarizes_content_without_raw_json(self):
+        result = build_tool_complete(
+            "tc-skill",
+            "skill_view",
+            '{"success":true,"name":"github-pitfalls","description":"GitHub gotchas","content":"# GitHub Pitfalls\\nUse gh carefully.","path":"github/github-pitfalls/SKILL.md"}',
+        )
+        text = result.content[0].content.text
+        assert "**Skill loaded**" in text
+        assert "`github-pitfalls`" in text
+        assert "GitHub gotchas" in text
+        assert "GitHub Pitfalls" in text
+        assert "Use gh carefully" not in text
+        assert "Full skill content is available to the agent" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_execute_code_formats_output(self):
+        result = build_tool_complete("tc-code", "execute_code", '{"output":"hello\\n","exit_code":0}')
+        text = result.content[0].content.text
+        assert "Exit code: 0" in text
+        assert "hello" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_skill_manage_summarizes_without_raw_json(self):
+        result = build_tool_complete(
+            "tc-skill-manage",
+            "skill_manage",
+            '{"success":true,"message":"Patched references/hermes-acp-zed-rendering.md in skill \'hermes-agent-operations\' (1 replacement)."}',
+            function_args={
+                "action": "patch",
+                "name": "hermes-agent-operations",
+                "file_path": "references/hermes-acp-zed-rendering.md",
+            },
+        )
+        text = result.content[0].content.text
+        assert "**✅ Skill updated**" in text
+        assert "`patch`" in text
+        assert "`hermes-agent-operations`" in text
+        assert "references/hermes-acp-zed-rendering.md" in text
+        assert "{\"success\"" not in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_read_file_formats_content(self):
+        result = build_tool_complete(
+            "tc-read",
+            "read_file",
+            '{"content":"1|hello\\n2|world","total_lines":2}',
+            function_args={"path":"README.md","offset":1,"limit":20},
+        )
+        text = result.content[0].content.text
+        assert "Read README.md" in text
+        assert "```\n1|hello\n2|world\n```" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_search_files_formats_matches(self):
+        result = build_tool_complete(
+            "tc-search",
+            "search_files",
+            '{"total_count":2,"matches":[{"path":"README.md","line":3,"content":"TODO: fix this"},{"path":"src/app.py","line":9,"content":"needle"}],"truncated":true}\n\n[Hint: Results truncated. Use offset=12 to see more.]',
+        )
+        text = result.content[0].content.text
+        assert "Search results" in text
+        assert "Found 2 matches" in text
+        assert "README.md:3" in text
+        assert "TODO: fix this" in text
+        assert "Results truncated" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_process_list_formats_table(self):
+        result = build_tool_complete(
+            "tc-process",
+            "process",
+            '{"processes":[{"session_id":"p1","status":"running","pid":123,"command":"npm run dev"}]}',
+            function_args={"action":"list"},
+        )
+        text = result.content[0].content.text
+        assert "Processes: 1" in text
+        assert "`p1`" in text
+        assert "npm run dev" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_delegate_task_summarizes_children(self):
+        result = build_tool_complete(
+            "tc-delegate",
+            "delegate_task",
+            '{"results":[{"task_index":0,"status":"completed","summary":"Reviewed ACP rendering.","model":"gpt-5.5","duration_seconds":3.2,"tool_trace":[{"tool":"read_file"}]}],"total_duration_seconds":3.4}',
+        )
+        text = result.content[0].content.text
+        assert "Delegation results: 1 task" in text
+        assert "Reviewed ACP rendering" in text
+        assert "gpt-5.5" in text
+        assert "Tools: read_file" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_session_search_recent(self):
+        result = build_tool_complete(
+            "tc-session",
+            "session_search",
+            '{"success":true,"mode":"recent","results":[{"session_id":"s1","title":"ACP work","last_active":"2026-05-02","message_count":12,"preview":"Polished tool rendering."}],"count":1}',
+        )
+        text = result.content[0].content.text
+        assert "Recent sessions" in text
+        assert "ACP work" in text
+        assert "Polished tool rendering" in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_memory_avoids_dumping_entries(self):
+        result = build_tool_complete(
+            "tc-memory",
+            "memory",
+            '{"success":true,"target":"user","entries":["private long memory"],"usage":"1% — 19/2000 chars","entry_count":1,"message":"Entry added."}',
+            function_args={"action":"add","target":"user","content":"User likes concise ACP rendering."},
+        )
+        text = result.content[0].content.text
+        assert "Memory add saved" in text
+        assert "User likes concise ACP rendering" in text
+        assert "private long memory" not in text
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_web_extract_success_stays_compact(self):
+        result = build_tool_complete(
+            "tc-web-extract",
+            "web_extract",
+            '{"results":[{"url":"https://example.com","title":"Example","content":"# Intro\\nThis is extracted content."}]}',
+        )
+        assert result.content is None
+        assert result.raw_output is None
+
+    def test_build_tool_complete_for_web_extract_error_shows_error(self):
+        result = build_tool_complete(
+            "tc-web-extract-error",
+            "web_extract",
+            '{"results":[{"url":"https://example.com","title":"Example","error":"timeout"}]}',
+        )
+        text = result.content[0].content.text
+        assert "Web extract failed" in text
+        assert "https://example.com" in text
+        assert "timeout" in text
+        assert result.raw_output is None

    def test_build_tool_complete_truncates_large_output(self):
        """Very large outputs should be truncated."""
@@ -1836,3 +1836,55 @@ class TestResolveMessagesMaxTokens:
        result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
        assert result > 0
        assert result != 0
+
+
+# ---------------------------------------------------------------------------
+# convert_tools_to_anthropic — tool dedup at API boundary
+# ---------------------------------------------------------------------------
+
+class TestConvertToolsToAnthropicDedup:
+    """convert_tools_to_anthropic must deduplicate tool names.
+
+    Anthropic rejects requests with duplicate tool names.  This guard converts
+    a hard failure into a warning log.  See:
+    https://github.com/NousResearch/hermes-agent/issues/18478
+    """
+
+    def _make_openai_tool(self, name: str) -> dict:
+        return {
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": f"Tool {name}",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+
+    def test_unique_tools_pass_through(self):
+        tools = [self._make_openai_tool("alpha"), self._make_openai_tool("beta")]
+        result = convert_tools_to_anthropic(tools)
+        assert len(result) == 2
+        names = [t["name"] for t in result]
+        assert names == ["alpha", "beta"]
+
+    def test_duplicate_tool_names_are_deduplicated(self):
+        """RED test — must fail until dedup guard is added."""
+        tools = [
+            self._make_openai_tool("lcm_grep"),
+            self._make_openai_tool("lcm_describe"),
+            self._make_openai_tool("lcm_grep"),  # duplicate
+            self._make_openai_tool("lcm_expand"),
+            self._make_openai_tool("lcm_describe"),  # duplicate
+        ]
+        result = convert_tools_to_anthropic(tools)
+        names = [t["name"] for t in result]
+        assert len(names) == len(set(names)), (
+            f"Duplicate tool names found: {names}"
+        )
+        assert len(result) == 3  # lcm_grep, lcm_describe, lcm_expand
+
+    def test_empty_tools_returns_empty(self):
+        assert convert_tools_to_anthropic([]) == []
+
+    def test_none_tools_returns_empty(self):
+        assert convert_tools_to_anthropic(None) == []
@@ -16,6 +16,7 @@ from agent.auxiliary_client import (
    auxiliary_max_tokens_param,
    call_llm,
    async_call_llm,
+    _build_call_kwargs,
    _read_codex_access_token,
    _get_provider_chain,
    _is_payment_error,
@@ -1752,3 +1753,143 @@ class TestVisionAutoSkipsKimiCoding:
            "kimi-coding",
            "kimi-coding-cn",
        })
+
+
+# ---------------------------------------------------------------------------
+# _build_call_kwargs — tool dedup at API boundary
+# ---------------------------------------------------------------------------
+
+class TestBuildCallKwargsToolDedup:
+    """_build_call_kwargs must deduplicate tool names before passing to API.
+
+    Providers like Google Vertex, Azure, and Bedrock reject requests with
+    duplicate tool names (HTTP 400).  This guard converts a hard failure into
+    a warning log so agent turns succeed even if an upstream injection path
+    regresses.  See: https://github.com/NousResearch/hermes-agent/issues/18478
+    """
+
+    def _make_tool(self, name: str) -> dict:
+        return {
+            "type": "function",
+            "function": {
+                "name": name,
+                "description": f"Tool {name}",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+
+    def test_unique_tools_pass_through_unchanged(self):
+        tools = [self._make_tool("alpha"), self._make_tool("beta")]
+        kwargs = _build_call_kwargs(
+            provider="openai", model="gpt-4o", messages=[], tools=tools,
+        )
+        assert len(kwargs["tools"]) == 2
+        names = [t["function"]["name"] for t in kwargs["tools"]]
+        assert names == ["alpha", "beta"]
+
+    def test_duplicate_tool_names_are_deduplicated(self):
+        """RED test — must fail until dedup guard is added."""
+        tools = [
+            self._make_tool("lcm_grep"),
+            self._make_tool("lcm_describe"),
+            self._make_tool("lcm_grep"),  # duplicate
+            self._make_tool("lcm_expand"),
+            self._make_tool("lcm_describe"),  # duplicate
+        ]
+        kwargs = _build_call_kwargs(
+            provider="google", model="gemini-2.5-pro", messages=[], tools=tools,
+        )
+        result_tools = kwargs["tools"]
+        names = [t["function"]["name"] for t in result_tools]
+        # Must be deduplicated — no repeated names
+        assert len(names) == len(set(names)), (
+            f"Duplicate tool names found: {names}"
+        )
+        assert len(result_tools) == 3  # lcm_grep, lcm_describe, lcm_expand
+
+    def test_empty_tools_unchanged(self):
+        kwargs = _build_call_kwargs(
+            provider="openai", model="gpt-4o", messages=[], tools=[],
+        )
+        assert kwargs.get("tools") == [] or "tools" not in kwargs
+
+    def test_none_tools_unchanged(self):
+        kwargs = _build_call_kwargs(
+            provider="openai", model="gpt-4o", messages=[], tools=None,
+        )
+        assert "tools" not in kwargs
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip provider env vars so each test starts clean."""
+    for key in (
+        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+class TestOpenRouterExplicitApiKey:
+    """Test that explicit_api_key is correctly propagated to _try_openrouter()."""
+
+    def test_resolve_provider_client_passes_explicit_api_key_to_openrouter(
+        self, monkeypatch
+    ):
+        """
+        When resolve_provider_client() is called with explicit_api_key for OpenRouter,
+        the explicit key should be passed to the OpenAI client instead of falling back
+        to OPENROUTER_API_KEY env var.
+        """
+        # Set up env var as fallback (should NOT be used when explicit_api_key is provided)
+        monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
+
+        # Mock OpenAI to capture the api_key used
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="openrouter-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="openrouter",
+                explicit_api_key="explicit-pool-key",
+            )
+
+            # Verify a client was created
+            assert client is not None
+            # Verify the explicit key was used, not the env var fallback
+            mock_openai.assert_called_once()
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "explicit-pool-key", (
+                f"Expected explicit_api_key to be passed, got: {call_kwargs['api_key']}"
+            )
+            assert call_kwargs["api_key"] != "env-fallback-key", (
+                "Should NOT fall back to OPENROUTER_API_KEY when explicit_api_key is provided"
+            )
+
+    def test_resolve_provider_client_without_explicit_api_key_falls_back_to_env(
+        self, monkeypatch
+    ):
+        """
+        When resolve_provider_client() is called WITHOUT explicit_api_key for OpenRouter,
+        it should fall back to OPENROUTER_API_KEY env var.
+        """
+        # Set up env var as fallback (should be used when explicit_api_key is NOT provided)
+        monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
+
+        # Mock OpenAI to capture the api_key used
+        mock_openai = MagicMock()
+        mock_openai.return_value = MagicMock(name="openrouter-client")
+
+        with patch("agent.auxiliary_client.OpenAI", mock_openai):
+            client, model = resolve_provider_client(
+                provider="openrouter",
+                explicit_api_key=None,
+            )
+
+            # Verify a client was created
+            assert client is not None
+            # Verify the env var fallback was used
+            mock_openai.assert_called_once()
+            call_kwargs = mock_openai.call_args[1]
+            assert call_kwargs["api_key"] == "env-fallback-key", (
+                f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}"
+            )
@@ -348,6 +348,64 @@ def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
    assert entry.access_token == "sk-or-seeded"


+
+def test_load_pool_prefers_dotenv_over_stale_os_environ(tmp_path, monkeypatch):
+    """Regression for #18254: stale OPENROUTER_API_KEY in os.environ (inherited
+    from a parent shell) must NOT shadow the fresh key in ~/.hermes/.env when
+    seeding the credential pool. Before the fix, `get_env_value()` preferred
+    os.environ and silently wrote the stale value into auth.json, causing
+    persistent 401 errors after key rotation.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Simulate the bug: parent shell exported a stale test key
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-STALE-from-shell")
+
+    # User edited ~/.hermes/.env with the fresh key
+    (hermes_home / ".env").write_text(
+        "OPENROUTER_API_KEY=sk-or-FRESH-from-dotenv\n"
+    )
+
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    from agent.credential_pool import load_pool
+    pool = load_pool("openrouter")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.source == "env:OPENROUTER_API_KEY"
+    # The fresh key from .env must win over the stale shell export
+    assert entry.access_token == "sk-or-FRESH-from-dotenv", (
+        f"Expected .env to win, got {entry.access_token!r}"
+    )
+
+
+def test_load_pool_falls_back_to_os_environ_when_dotenv_empty(tmp_path, monkeypatch):
+    """When ~/.hermes/.env does not define OPENROUTER_API_KEY (typical Docker /
+    K8s / systemd deployment), seeding must still pick up the key from
+    os.environ. Guards against regressions that would break production
+    deployments relying on runtime-injected env vars.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-from-runtime-env")
+
+    # .env exists but does not define OPENROUTER_API_KEY
+    (hermes_home / ".env").write_text("SOME_OTHER_VAR=unrelated\n")
+
+    _write_auth_store(tmp_path, {"version": 1, "providers": {}})
+
+    from agent.credential_pool import load_pool
+    pool = load_pool("openrouter")
+    entry = pool.select()
+
+    assert entry is not None
+    assert entry.access_token == "sk-or-from-runtime-env"
+
+
 def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
@@ -314,3 +314,281 @@ def test_dry_run_skips_snapshot(backup_env, monkeypatch):
    assert not any(r.get("reason") == "pre-curator-run" for r in rows), (
        "dry-run must not create a pre-run snapshot"
    )
+
+
+# ---------------------------------------------------------------------------
+# cron-jobs backup + rollback (the part issue #18671's follow-up adds)
+# ---------------------------------------------------------------------------
+
+
+def _write_cron_jobs(home: Path, jobs: list) -> Path:
+    """Write a synthetic cron/jobs.json under HERMES_HOME. Returns the path.
+    Mirrors cron.jobs.save_jobs() wrapper shape: `{"jobs": [...], "updated_at": ...}`.
+    """
+    cron_dir = home / "cron"
+    cron_dir.mkdir(parents=True, exist_ok=True)
+    path = cron_dir / "jobs.json"
+    path.write_text(
+        json.dumps({"jobs": jobs, "updated_at": "2026-05-01T00:00:00Z"}, indent=2),
+        encoding="utf-8",
+    )
+    return path
+
+
+def _reload_cron_jobs(home: Path):
+    """Reload cron.jobs so its module-level HERMES_DIR picks up the tmp HOME."""
+    import hermes_constants
+    importlib.reload(hermes_constants)
+    if "cron.jobs" in sys.modules:
+        import cron.jobs as _cj
+        importlib.reload(_cj)
+    else:
+        import cron.jobs as _cj  # noqa: F401
+    import cron.jobs as cj
+    return cj
+
+
+def test_snapshot_includes_cron_jobs(backup_env):
+    """With a cron/jobs.json present, snapshot writes cron-jobs.json and records it in manifest."""
+    cb = backup_env["cb"]
+    _write_skill(backup_env["skills"], "alpha")
+    _write_cron_jobs(backup_env["home"], [
+        {"id": "job-a", "name": "a", "schedule": "every 1h", "skills": ["alpha"]},
+        {"id": "job-b", "name": "b", "schedule": "every 2h", "skill": "alpha"},
+    ])
+
+    snap = cb.snapshot_skills(reason="test")
+    assert snap is not None
+    assert (snap / cb.CRON_JOBS_FILENAME).exists()
+
+    mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
+    assert mf["cron_jobs"]["backed_up"] is True
+    assert mf["cron_jobs"]["jobs_count"] == 2
+
+
+def test_snapshot_without_cron_jobs_file_still_succeeds(backup_env):
+    """No cron/jobs.json on disk → snapshot succeeds, manifest records absence."""
+    cb = backup_env["cb"]
+    _write_skill(backup_env["skills"], "alpha")
+    # Deliberately do not create ~/.hermes/cron/jobs.json
+
+    snap = cb.snapshot_skills(reason="test")
+    assert snap is not None
+    assert not (snap / cb.CRON_JOBS_FILENAME).exists()
+
+    mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
+    assert mf["cron_jobs"]["backed_up"] is False
+    assert "cron/jobs.json" in mf["cron_jobs"]["reason"]
+
+
+def test_snapshot_cron_jobs_malformed_json_still_captured(backup_env):
+    """Malformed jobs.json is still copied to the snapshot (fidelity over
+    validation); the manifest notes the parse warning."""
+    cb = backup_env["cb"]
+    _write_skill(backup_env["skills"], "alpha")
+    (backup_env["home"] / "cron").mkdir()
+    (backup_env["home"] / "cron" / "jobs.json").write_text("{oh no", encoding="utf-8")
+
+    snap = cb.snapshot_skills(reason="test")
+    assert snap is not None
+    # Raw file was copied even though we couldn't parse it
+    assert (snap / cb.CRON_JOBS_FILENAME).read_text() == "{oh no"
+
+    mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
+    assert mf["cron_jobs"]["backed_up"] is True
+    assert mf["cron_jobs"]["jobs_count"] == 0
+    assert "parse_warning" in mf["cron_jobs"]
+
+
+def test_rollback_restores_cron_skill_links(backup_env):
+    """End-to-end: snapshot with job [alpha,beta], curator-style in-place
+    rewrite to [umbrella], then rollback → skills restored to [alpha,beta]."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+    _write_skill(backup_env["skills"], "alpha")
+    _write_skill(backup_env["skills"], "beta")
+    _write_skill(backup_env["skills"], "umbrella")
+
+    cj = _reload_cron_jobs(home)
+    cj.create_job(name="weekly", prompt="p", schedule="every 7d",
+                  skills=["alpha", "beta"])
+
+    snap = cb.snapshot_skills(reason="pre-curator-run")
+    assert snap is not None
+
+    # Simulate the curator's in-place cron rewrite after consolidation
+    cj.rewrite_skill_refs(
+        consolidated={"alpha": "umbrella", "beta": "umbrella"},
+        pruned=[],
+    )
+    live_after_curator = cj.load_jobs()
+    assert live_after_curator[0]["skills"] == ["umbrella"]
+
+    # Now roll back
+    ok, msg, _ = cb.rollback(backup_id=snap.name)
+    assert ok, msg
+    assert "cron links" in msg
+
+    live_after_rollback = cj.load_jobs()
+    # skills restored; legacy `skill` mirror follows first element
+    assert live_after_rollback[0]["skills"] == ["alpha", "beta"]
+
+
+def test_rollback_only_touches_skill_fields(backup_env):
+    """Every field other than skills/skill must remain untouched across rollback.
+    Schedule, enabled, prompt, timestamps — all live state, hands off."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+    _write_skill(backup_env["skills"], "alpha")
+
+    # Hand-rolled jobs.json with varied fields (no real create_job — we want
+    # exact field control).
+    _write_cron_jobs(home, [{
+        "id": "stable-id",
+        "name": "original-name",
+        "prompt": "original prompt",
+        "schedule": "every 1h",
+        "skills": ["alpha"],
+        "enabled": True,
+        "last_run_at": "2026-04-01T00:00:00Z",
+    }])
+    snap = cb.snapshot_skills(reason="pre-curator-run")
+    assert snap is not None
+
+    # User/scheduler activity AFTER the snapshot: rename the job, change
+    # the schedule, update timestamps, and (curator) rewrite the skills list.
+    cj = _reload_cron_jobs(home)
+    jobs = cj.load_jobs()
+    jobs[0]["name"] = "renamed-since-snapshot"
+    jobs[0]["schedule"] = "every 30m"
+    jobs[0]["last_run_at"] = "2026-05-01T12:00:00Z"
+    jobs[0]["skills"] = ["umbrella"]  # pretend curator did this
+    cj.save_jobs(jobs)
+
+    ok, _, _ = cb.rollback(backup_id=snap.name)
+    assert ok
+
+    after = cj.load_jobs()
+    job = after[0]
+    # skills: restored
+    assert job["skills"] == ["alpha"]
+    # everything else: untouched (live state preserved)
+    assert job["name"] == "renamed-since-snapshot"
+    assert job["schedule"] == "every 30m"
+    assert job["last_run_at"] == "2026-05-01T12:00:00Z"
+    assert job["prompt"] == "original prompt"
+
+
+def test_rollback_skips_jobs_the_user_deleted(backup_env):
+    """If the user deleted a cron job after the snapshot, rollback must
+    NOT resurrect it — the user's delete is a later, explicit choice."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+    _write_skill(backup_env["skills"], "alpha")
+
+    _write_cron_jobs(home, [
+        {"id": "keep-me", "name": "keep", "schedule": "every 1h", "skills": ["alpha"]},
+        {"id": "delete-me", "name": "gone", "schedule": "every 1h", "skills": ["alpha"]},
+    ])
+    snap = cb.snapshot_skills(reason="pre-curator-run")
+
+    # User deletes one job after the snapshot
+    cj = _reload_cron_jobs(home)
+    cj.save_jobs([j for j in cj.load_jobs() if j["id"] != "delete-me"])
+
+    ok, _, _ = cb.rollback(backup_id=snap.name)
+    assert ok
+
+    live_after = cj.load_jobs()
+    live_ids = {j["id"] for j in live_after}
+    assert "keep-me" in live_ids
+    assert "delete-me" not in live_ids  # not resurrected
+
+
+def test_rollback_leaves_new_jobs_untouched(backup_env):
+    """Jobs created AFTER the snapshot must pass through rollback unchanged."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+    _write_skill(backup_env["skills"], "alpha")
+    _write_cron_jobs(home, [
+        {"id": "original", "name": "o", "schedule": "every 1h", "skills": ["alpha"]},
+    ])
+    snap = cb.snapshot_skills(reason="pre-curator-run")
+
+    cj = _reload_cron_jobs(home)
+    jobs = cj.load_jobs()
+    jobs.append({"id": "new-after-snapshot", "name": "new",
+                 "schedule": "every 15m", "skills": ["brand-new-skill"]})
+    cj.save_jobs(jobs)
+
+    ok, _, _ = cb.rollback(backup_id=snap.name)
+    assert ok
+
+    live = cj.load_jobs()
+    by_id = {j["id"]: j for j in live}
+    assert "new-after-snapshot" in by_id
+    # New job's fields completely preserved
+    assert by_id["new-after-snapshot"]["skills"] == ["brand-new-skill"]
+    assert by_id["new-after-snapshot"]["schedule"] == "every 15m"
+
+
+def test_rollback_with_snapshot_missing_cron_succeeds(backup_env):
+    """Older snapshots (created before this feature shipped) have no
+    cron-jobs.json. Rollback must still restore the skills tree and not
+    error out."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+    _write_skill(backup_env["skills"], "alpha")
+
+    # No cron/jobs.json at snapshot time — simulates a pre-feature snapshot
+    snap = cb.snapshot_skills(reason="test")
+    assert snap is not None
+    assert not (snap / cb.CRON_JOBS_FILENAME).exists()
+
+    # Later the user created a cron job
+    _write_cron_jobs(home, [
+        {"id": "later-job", "name": "l", "schedule": "every 1h", "skills": ["x"]},
+    ])
+
+    ok, msg, _ = cb.rollback(backup_id=snap.name)
+    # Main rollback still succeeds; cron report notes the missing file.
+    assert ok, msg
+    # Jobs.json untouched (nothing to restore from)
+    cj = _reload_cron_jobs(home)
+    jobs = cj.load_jobs()
+    assert jobs[0]["id"] == "later-job"
+    assert jobs[0]["skills"] == ["x"]
+
+
+def test_restore_cron_skill_links_standalone(backup_env):
+    """Unit-level test on _restore_cron_skill_links without the full rollback.
+    Verifies the report structure carefully."""
+    cb = backup_env["cb"]
+    home = backup_env["home"]
+
+    # Prime a snapshot dir manually with cron-jobs.json
+    backups_dir = home / "skills" / ".curator_backups" / "fake-id"
+    backups_dir.mkdir(parents=True)
+    (backups_dir / cb.CRON_JOBS_FILENAME).write_text(json.dumps([
+        {"id": "job-1", "name": "one", "skills": ["narrow-a", "narrow-b"]},
+        {"id": "job-2", "name": "two", "skill": "legacy-single"},
+        {"id": "job-gone", "name": "deleted", "skills": ["whatever"]},
+    ]), encoding="utf-8")
+
+    # Live jobs: job-1 got rewritten, job-2 unchanged, job-gone deleted
+    _write_cron_jobs(home, [
+        {"id": "job-1", "name": "one", "skills": ["umbrella"], "schedule": "every 1h"},
+        {"id": "job-2", "name": "two", "skill": "legacy-single", "schedule": "every 1h"},
+        {"id": "job-new", "name": "new", "skills": ["x"], "schedule": "every 1h"},
+    ])
+    _reload_cron_jobs(home)
+
+    report = cb._restore_cron_skill_links(backups_dir)
+    assert report["attempted"] is True
+    assert report["error"] is None
+    assert report["unchanged"] == 1  # job-2 matched
+    assert len(report["restored"]) == 1  # job-1 got restored
+    assert report["restored"][0]["job_id"] == "job-1"
+    assert report["restored"][0]["to"]["skills"] == ["narrow-a", "narrow-b"]
+    assert len(report["skipped_missing"]) == 1
+    assert report["skipped_missing"][0]["job_id"] == "job-gone"
@@ -548,3 +548,266 @@ def test_reconcile_model_block_visible_in_full_report(curator_env):
    md = (run_dir / "REPORT.md").read_text()
    assert "duplicate content, now a subsection" in md
    assert "pre-curator junk" in md
+
+
+# ---------------------------------------------------------------------------
+# _extract_absorbed_into_declarations — authoritative signal from delete calls
+# ---------------------------------------------------------------------------
+
+
+def test_extract_absorbed_into_picks_up_consolidation(curator_env):
+    """Delete call with absorbed_into=<umbrella> yields a declaration."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": json.dumps({
+                "action": "delete",
+                "name": "narrow-skill",
+                "absorbed_into": "umbrella",
+            }),
+        },
+    ])
+    assert declarations == {
+        "narrow-skill": {"into": "umbrella", "declared": True},
+    }
+
+
+def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env):
+    """absorbed_into='' is recorded as an explicit prune declaration."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": json.dumps({
+                "action": "delete",
+                "name": "stale",
+                "absorbed_into": "",
+            }),
+        },
+    ])
+    assert declarations == {"stale": {"into": "", "declared": True}}
+
+
+def test_extract_absorbed_into_missing_arg_ignored(curator_env):
+    """Delete call without absorbed_into is skipped — fallback to heuristic."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": json.dumps({
+                "action": "delete",
+                "name": "legacy-skill",
+            }),
+        },
+    ])
+    assert declarations == {}
+
+
+def test_extract_absorbed_into_ignores_non_delete_actions(curator_env):
+    """Patch, create, write_file etc. must not leak into declarations."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": json.dumps({
+                "action": "patch",
+                "name": "umbrella",
+                "old_string": "...",
+                "new_string": "...",
+                "absorbed_into": "something",  # bogus on non-delete, must be ignored
+            }),
+        },
+    ])
+    assert declarations == {}
+
+
+def test_extract_absorbed_into_accepts_dict_arguments(curator_env):
+    """arguments can arrive as a dict (defensive path) — still works."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": {
+                "action": "delete",
+                "name": "narrow",
+                "absorbed_into": "umbrella",
+            },
+        },
+    ])
+    assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
+
+
+def test_extract_absorbed_into_strips_whitespace(curator_env):
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {
+            "name": "skill_manage",
+            "arguments": json.dumps({
+                "action": "delete",
+                "name": "  narrow  ",
+                "absorbed_into": "  umbrella  ",
+            }),
+        },
+    ])
+    assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
+
+
+def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env):
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {"name": "terminal", "arguments": json.dumps({"command": "ls"})},
+        {"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})},
+    ])
+    assert declarations == {}
+
+
+def test_extract_absorbed_into_handles_malformed_arguments(curator_env):
+    """Garbage JSON in arguments must not crash the extractor."""
+    declarations = curator_env._extract_absorbed_into_declarations([
+        {"name": "skill_manage", "arguments": "{not json"},
+        {"name": "skill_manage", "arguments": None},
+        {"name": "skill_manage"},  # no arguments key at all
+    ])
+    assert declarations == {}
+
+
+# ---------------------------------------------------------------------------
+# _reconcile_classification with absorbed_into declarations (authoritative)
+# ---------------------------------------------------------------------------
+
+
+def test_reconcile_absorbed_into_beats_everything_else(curator_env):
+    """Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins.
+
+    This is the exact #18671 regression: the model forgets to emit the YAML
+    summary block, the heuristic's substring match misses because the
+    umbrella's patch content doesn't literally contain the old skill's
+    slug. Previously this fell through to 'no-evidence fallback' prune,
+    which dropped the cron ref instead of rewriting. With absorbed_into
+    declared, the model tells us directly.
+    """
+    out = curator_env._reconcile_classification(
+        removed=["pr-review-format"],
+        heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]},
+        model_block={"consolidations": [], "prunings": []},  # model forgot YAML block
+        destinations={"hermes-agent-dev"},
+        absorbed_declarations={
+            "pr-review-format": {"into": "hermes-agent-dev", "declared": True},
+        },
+    )
+    assert len(out["consolidated"]) == 1
+    assert out["pruned"] == []
+    e = out["consolidated"][0]
+    assert e["name"] == "pr-review-format"
+    assert e["into"] == "hermes-agent-dev"
+    assert "absorbed_into" in e["source"]
+
+
+def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env):
+    """absorbed_into='' takes precedence and routes to pruned, not fallback."""
+    out = curator_env._reconcile_classification(
+        removed=["stale"],
+        heuristic={"consolidated": [], "pruned": [{"name": "stale"}]},
+        model_block={"consolidations": [], "prunings": []},
+        destinations=set(),
+        absorbed_declarations={
+            "stale": {"into": "", "declared": True},
+        },
+    )
+    assert out["consolidated"] == []
+    assert len(out["pruned"]) == 1
+    assert "model-declared prune" in out["pruned"][0]["source"]
+
+
+def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env):
+    """If the declared umbrella doesn't exist in destinations, fall through to
+    heuristic/YAML logic. Shouldn't happen in practice (the tool validates at
+    delete time) but the reconciler is defensive."""
+    out = curator_env._reconcile_classification(
+        removed=["thing"],
+        heuristic={
+            "consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
+            "pruned": [],
+        },
+        model_block={"consolidations": [], "prunings": []},
+        destinations={"real-umbrella"},
+        absorbed_declarations={
+            "thing": {"into": "ghost-umbrella", "declared": True},
+        },
+    )
+    assert len(out["consolidated"]) == 1
+    assert out["consolidated"][0]["into"] == "real-umbrella"
+    assert "tool-call audit" in out["consolidated"][0]["source"]
+
+
+def test_reconcile_declaration_preserves_yaml_reason(curator_env):
+    """When the model both declared absorbed_into AND emitted YAML with reason,
+    the reason carries through so REPORT.md still has it."""
+    out = curator_env._reconcile_classification(
+        removed=["narrow"],
+        heuristic={"consolidated": [], "pruned": []},
+        model_block={
+            "consolidations": [{
+                "from": "narrow",
+                "into": "umbrella",
+                "reason": "duplicate of umbrella's main content",
+            }],
+            "prunings": [],
+        },
+        destinations={"umbrella"},
+        absorbed_declarations={
+            "narrow": {"into": "umbrella", "declared": True},
+        },
+    )
+    assert len(out["consolidated"]) == 1
+    e = out["consolidated"][0]
+    assert e["into"] == "umbrella"
+    assert "absorbed_into" in e["source"]
+    assert e["reason"] == "duplicate of umbrella's main content"
+
+
+def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env):
+    """Backward compat: no absorbed_declarations arg → all existing logic intact."""
+    out = curator_env._reconcile_classification(
+        removed=["thing"],
+        heuristic={
+            "consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}],
+            "pruned": [],
+        },
+        model_block={"consolidations": [], "prunings": []},
+        destinations={"umbrella"},
+        # no absorbed_declarations — defaults to None → behaves identically to pre-change
+    )
+    assert len(out["consolidated"]) == 1
+    assert out["consolidated"][0]["into"] == "umbrella"
+
+
+def test_reconcile_mixed_declarations_and_legacy_calls(curator_env):
+    """Real-world run: some deletes declared absorbed_into, some didn't.
+    Declared ones use the authoritative path; others fall through to YAML/heuristic.
+    """
+    out = curator_env._reconcile_classification(
+        removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"],
+        heuristic={
+            "consolidated": [
+                {"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."},
+            ],
+            "pruned": [{"name": "legacy-prune"}],
+        },
+        model_block={"consolidations": [], "prunings": []},
+        destinations={"umbrella-a", "umbrella-b"},
+        absorbed_declarations={
+            "declared-cons": {"into": "umbrella-b", "declared": True},
+            "declared-prune": {"into": "", "declared": True},
+        },
+    )
+    cons_by_name = {e["name"]: e for e in out["consolidated"]}
+    pruned_by_name = {e["name"]: e for e in out["pruned"]}
+
+    assert "declared-cons" in cons_by_name
+    assert cons_by_name["declared-cons"]["into"] == "umbrella-b"
+    assert "absorbed_into" in cons_by_name["declared-cons"]["source"]
+
+    assert "legacy-cons" in cons_by_name
+    assert cons_by_name["legacy-cons"]["into"] == "umbrella-a"
+    assert "tool-call audit" in cons_by_name["legacy-cons"]["source"]
+
+    assert "declared-prune" in pruned_by_name
+    assert "model-declared prune" in pruned_by_name["declared-prune"]["source"]
+
+    assert "legacy-prune" in pruned_by_name
+    assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"]
@@ -0,0 +1,284 @@
+"""Tests for OpenRouter response caching header injection."""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# build_or_headers
+# ---------------------------------------------------------------------------
+
+class TestBuildOrHeaders:
+    """Test the build_or_headers() helper in agent/auxiliary_client.py."""
+
+    def test_base_attribution_always_present(self):
+        """Attribution headers must always be included regardless of cache setting."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+        assert headers["X-OpenRouter-Title"] == "Hermes Agent"
+        assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent"
+
+    def test_cache_enabled(self):
+        """When response_cache is True, X-OpenRouter-Cache header is set."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_cache_disabled(self):
+        """When response_cache is False, no cache header is sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert "X-OpenRouter-Cache" not in headers
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_cache_disabled_by_default_empty_config(self):
+        """Empty config dict means no cache headers (response_cache defaults to False)."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={})
+        assert "X-OpenRouter-Cache" not in headers
+
+    def test_ttl_default(self):
+        """Default TTL (300) is included when cache is enabled."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300})
+        assert headers["X-OpenRouter-Cache-TTL"] == "300"
+
+    def test_ttl_custom(self):
+        """Custom TTL values within range are sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600})
+        assert headers["X-OpenRouter-Cache-TTL"] == "3600"
+
+    def test_ttl_max(self):
+        """Maximum TTL (86400) is accepted."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400})
+        assert headers["X-OpenRouter-Cache-TTL"] == "86400"
+
+    def test_ttl_out_of_range_too_high(self):
+        """TTL above 86400 is silently ignored (no TTL header sent)."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+        # But cache is still enabled
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_ttl_out_of_range_zero(self):
+        """TTL of 0 is below minimum — no TTL header sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_negative(self):
+        """Negative TTL is ignored."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_not_a_number(self):
+        """Non-numeric TTL is ignored."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_float_truncated(self):
+        """Float TTL values are truncated to int."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7})
+        assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+    def test_returns_fresh_dict(self):
+        """Each call returns a new dict so mutations don't leak."""
+        from agent.auxiliary_client import build_or_headers
+
+        cfg = {"response_cache": True}
+        h1 = build_or_headers(or_config=cfg)
+        h2 = build_or_headers(or_config=cfg)
+        assert h1 is not h2
+        assert h1 == h2
+
+    def test_none_config_falls_back_to_load_config(self):
+        """When or_config is None, build_or_headers reads from load_config()."""
+        from agent.auxiliary_client import build_or_headers
+
+        fake_cfg = {
+            "openrouter": {"response_cache": True, "response_cache_ttl": 900},
+        }
+        with patch("hermes_cli.config.load_config", return_value=fake_cfg):
+            headers = build_or_headers(or_config=None)
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert headers["X-OpenRouter-Cache-TTL"] == "900"
+
+    def test_none_config_load_config_fails_gracefully(self):
+        """When load_config() fails, build_or_headers still returns base headers."""
+        from agent.auxiliary_client import build_or_headers
+
+        with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
+            headers = build_or_headers(or_config=None)
+        # Should have base attribution but no cache headers
+        assert "HTTP-Referer" in headers
+        assert "X-OpenRouter-Cache" not in headers
+
+
+# ---------------------------------------------------------------------------
+# Environment variable overrides
+# ---------------------------------------------------------------------------
+
+class TestEnvVarOverrides:
+    """Test env var precedence over config.yaml for response caching."""
+
+    def test_env_enables_cache(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE=true enables cache even when config disables it."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_env_disables_cache(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE=false disables cache even when config enables it."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false")
+        headers = build_or_headers(or_config={"response_cache": True})
+        assert "X-OpenRouter-Cache" not in headers
+
+    @pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"])
+    def test_truthy_values(self, monkeypatch, value):
+        """Various truthy strings enable caching."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
+        headers = build_or_headers(or_config={})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    @pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""])
+    def test_non_truthy_values(self, monkeypatch, value):
+        """Non-truthy strings do not enable caching (empty falls through to config)."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
+        # Empty string falls through to config; others are explicitly non-truthy
+        if value == "":
+            # Empty env var falls through to config default (False)
+            headers = build_or_headers(or_config={"response_cache": False})
+        else:
+            headers = build_or_headers(or_config={"response_cache": True})
+        assert "X-OpenRouter-Cache" not in headers
+
+    def test_env_ttl_overrides_config(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE_TTL overrides config TTL."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800")
+        headers = build_or_headers(or_config={"response_cache_ttl": 300})
+        assert headers["X-OpenRouter-Cache-TTL"] == "1800"
+
+    @pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"])
+    def test_invalid_env_ttl_dropped(self, monkeypatch, ttl):
+        """Invalid TTL env values are ignored; cache still enabled without TTL."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
+        headers = build_or_headers(or_config={})
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    @pytest.mark.parametrize("ttl", ["1", "300", "86400"])
+    def test_valid_env_ttl_boundaries(self, monkeypatch, ttl):
+        """Boundary TTL values (1, 300, 86400) are accepted."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
+        assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl
+
+    def test_no_env_vars_falls_through_to_config(self, monkeypatch):
+        """Without env vars, config.yaml controls behavior."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False)
+        monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False)
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600})
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+class TestDefaultConfig:
+    """Verify the openrouter config section is in DEFAULT_CONFIG."""
+
+    def test_openrouter_section_exists(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        assert "openrouter" in DEFAULT_CONFIG
+        or_cfg = DEFAULT_CONFIG["openrouter"]
+        assert or_cfg["response_cache"] is True
+        assert or_cfg["response_cache_ttl"] == 300
+
+
+# ---------------------------------------------------------------------------
+# _check_openrouter_cache_status
+# ---------------------------------------------------------------------------
+
+class TestCheckOpenrouterCacheStatus:
+    """Test the _check_openrouter_cache_status method on AIAgent."""
+
+    def _make_agent(self):
+        """Create a minimal AIAgent-like object with just the method under test."""
+        from run_agent import AIAgent
+
+        # Use object.__new__ to skip __init__, then set the attributes we need
+        agent = object.__new__(AIAgent)
+        agent._or_cache_hits = 0
+        return agent
+
+    def test_hit_increments_counter(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"})
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 1
+        # Second hit increments
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 2
+
+    def test_miss_does_not_increment(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"})
+        agent._check_openrouter_cache_status(resp)
+        assert getattr(agent, "_or_cache_hits", 0) == 0
+
+    def test_no_header_is_noop(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={})
+        agent._check_openrouter_cache_status(resp)
+        assert getattr(agent, "_or_cache_hits", 0) == 0
+
+    def test_none_response_is_safe(self):
+        agent = self._make_agent()
+        agent._check_openrouter_cache_status(None)  # no crash
+
+    def test_no_headers_attr_is_safe(self):
+        agent = self._make_agent()
+        agent._check_openrouter_cache_status(object())  # no crash
+
+    def test_case_insensitive(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"})
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 1
@@ -125,6 +125,58 @@ class TestScanSkillCommands:
        assert "/knowledge-brain" in result
        assert result["/knowledge-brain"]["name"] == "knowledge-brain"

+    def test_get_skill_commands_rescans_when_platform_scope_changes(self, tmp_path):
+        """Platform-specific disabled-skill caches must not leak across platforms.
+
+        Regression test for #14536: a gateway process serving Telegram
+        and Discord concurrently would seed the process-global cache
+        with whichever platform scanned first, and subsequent
+        ``get_skill_commands()`` calls from the other platform silently
+        inherited that filter.
+        """
+        import agent.skill_commands as sc_mod
+        from agent.skill_commands import get_skill_commands
+
+        def _disabled_skills():
+            platform = os.getenv("HERMES_PLATFORM")
+            if platform == "telegram":
+                return {"telegram-only"}
+            if platform == "discord":
+                return {"discord-only"}
+            return set()
+
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch("tools.skills_tool._get_disabled_skill_names", side_effect=_disabled_skills),
+            patch.object(sc_mod, "_skill_commands", {}),
+            patch.object(sc_mod, "_skill_commands_platform", None),
+        ):
+            _make_skill(tmp_path, "shared")
+            _make_skill(tmp_path, "telegram-only")
+            _make_skill(tmp_path, "discord-only")
+
+            with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
+                telegram_commands = dict(get_skill_commands())
+
+            assert "/shared" in telegram_commands
+            assert "/discord-only" in telegram_commands
+            assert "/telegram-only" not in telegram_commands
+
+            with patch.dict(os.environ, {"HERMES_PLATFORM": "discord"}):
+                discord_commands = dict(get_skill_commands())
+
+            assert "/shared" in discord_commands
+            assert "/telegram-only" in discord_commands
+            assert "/discord-only" not in discord_commands
+
+            # Switching back to telegram must also rescan — not re-serve
+            # the discord view that was just cached.
+            with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
+                telegram_again = dict(get_skill_commands())
+
+            assert "/telegram-only" not in telegram_again
+            assert "/discord-only" in telegram_again
+

    def test_special_chars_stripped_from_cmd_key(self, tmp_path):
        """Skill names with +, /, or other special chars produce clean cmd keys."""
@@ -0,0 +1,166 @@
+"""Regression tests for the config.yaml → env var bridge in gateway/run.py.
+
+Guards against the 60-vs-500 bug where a stale `.env HERMES_MAX_ITERATIONS=60`
+entry silently shadowed `agent.max_turns: 500` in config.yaml because the
+bridge used `if X not in os.environ` guards. After PR#18413 the bridge
+treats config.yaml as authoritative and unconditionally overwrites .env
+values for `agent.*`, `display.*`, `timezone`, and `security.*` keys.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+import textwrap
+from pathlib import Path
+
+import pytest
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+
+
+def _run_gateway_import(hermes_home: Path, initial_env: dict[str, str]) -> dict[str, str]:
+    """Import gateway.run in a clean subprocess and return the post-import env.
+
+    The bridge runs at module-import time, so simply importing is enough
+    to exercise it. Running in a subprocess isolates the test from other
+    import side effects and makes the "what ends up in os.environ" check
+    deterministic.
+    """
+    script = textwrap.dedent(
+        f"""
+        import os, sys
+        sys.path.insert(0, {str(PROJECT_ROOT)!r})
+
+        try:
+            from gateway import run  # noqa: F401  — module import triggers bridge
+        except Exception as exc:
+            print(f"IMPORT_ERROR:{{type(exc).__name__}}:{{exc}}", file=sys.stderr)
+            sys.exit(2)
+
+        for k in (
+            "HERMES_MAX_ITERATIONS",
+            "HERMES_AGENT_TIMEOUT",
+            "HERMES_AGENT_TIMEOUT_WARNING",
+            "HERMES_GATEWAY_BUSY_INPUT_MODE",
+            "HERMES_TIMEZONE",
+        ):
+            v = os.environ.get(k)
+            if v is not None:
+                print(f"{{k}}={{v}}")
+        """
+    )
+    env = dict(initial_env)
+    env["HERMES_HOME"] = str(hermes_home)
+    # Keep PATH / PYTHONPATH so venv imports resolve.
+    for k in ("PATH", "PYTHONPATH", "VIRTUAL_ENV", "HOME"):
+        if k in os.environ and k not in env:
+            env[k] = os.environ[k]
+
+    result = subprocess.run(
+        [sys.executable, "-c", script],
+        env=env,
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    if result.returncode != 0:
+        pytest.fail(
+            f"gateway.run import failed (rc={result.returncode})\n"
+            f"stderr:\n{result.stderr}\nstdout:\n{result.stdout}"
+        )
+    out: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" in line:
+            k, v = line.split("=", 1)
+            out[k] = v
+    return out
+
+
+def _write_config(home: Path, agent_cfg: dict | None = None, display_cfg: dict | None = None,
+                  timezone: str | None = None) -> None:
+    import yaml
+    cfg: dict = {}
+    if agent_cfg:
+        cfg["agent"] = agent_cfg
+    if display_cfg:
+        cfg["display"] = display_cfg
+    if timezone:
+        cfg["timezone"] = timezone
+    (home / "config.yaml").write_text(yaml.safe_dump(cfg))
+
+
+def _write_env(home: Path, entries: dict[str, str]) -> None:
+    lines = [f"{k}={v}\n" for k, v in entries.items()]
+    (home / ".env").write_text("".join(lines))
+
+
+@pytest.fixture
+def hermes_home(tmp_path: Path) -> Path:
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    return home
+
+
+def test_config_max_turns_wins_over_stale_env(hermes_home: Path) -> None:
+    """Regression: config.yaml:agent.max_turns=500 must beat .env=60."""
+    _write_config(hermes_home, agent_cfg={"max_turns": 500})
+    _write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "60"})
+
+    env = _run_gateway_import(hermes_home, initial_env={})
+
+    assert env.get("HERMES_MAX_ITERATIONS") == "500", (
+        f"expected config.yaml max_turns=500 to win; got {env.get('HERMES_MAX_ITERATIONS')!r}. "
+        "Stale .env value is shadowing config — the bridge lost its override."
+    )
+
+
+def test_config_gateway_timeout_wins_over_stale_env(hermes_home: Path) -> None:
+    """Every agent.* bridge key must be config-authoritative, not .env-authoritative."""
+    _write_config(hermes_home, agent_cfg={
+        "gateway_timeout": 1800,
+        "gateway_timeout_warning": 900,
+    })
+    _write_env(hermes_home, {
+        "HERMES_AGENT_TIMEOUT": "60",
+        "HERMES_AGENT_TIMEOUT_WARNING": "30",
+    })
+
+    env = _run_gateway_import(hermes_home, initial_env={})
+
+    assert env.get("HERMES_AGENT_TIMEOUT") == "1800"
+    assert env.get("HERMES_AGENT_TIMEOUT_WARNING") == "900"
+
+
+def test_config_display_busy_input_mode_wins_over_stale_env(hermes_home: Path) -> None:
+    _write_config(hermes_home, display_cfg={"busy_input_mode": "interrupt"})
+    _write_env(hermes_home, {"HERMES_GATEWAY_BUSY_INPUT_MODE": "queue"})
+
+    env = _run_gateway_import(hermes_home, initial_env={})
+
+    assert env.get("HERMES_GATEWAY_BUSY_INPUT_MODE") == "interrupt"
+
+
+def test_config_timezone_wins_over_stale_env(hermes_home: Path) -> None:
+    _write_config(hermes_home, timezone="America/Los_Angeles")
+    _write_env(hermes_home, {"HERMES_TIMEZONE": "UTC"})
+
+    env = _run_gateway_import(hermes_home, initial_env={})
+
+    assert env.get("HERMES_TIMEZONE") == "America/Los_Angeles"
+
+
+def test_env_value_survives_when_config_omits_key(hermes_home: Path) -> None:
+    """If config.yaml doesn't set max_turns, .env value must still pass through.
+
+    The bridge only overwrites when the config key is present — an absent
+    config key should NOT clobber the .env value.
+    """
+    _write_config(hermes_home, agent_cfg={})  # no max_turns
+    _write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "123"})
+
+    env = _run_gateway_import(hermes_home, initial_env={})
+
+    assert env.get("HERMES_MAX_ITERATIONS") == "123"
@@ -172,6 +172,69 @@ async def test_connect_only_requests_members_intent_when_needed(monkeypatch, all
    await adapter.disconnect()


+@pytest.mark.asyncio
+async def test_reconnect_closes_previous_client_to_prevent_zombie_websocket(monkeypatch):
+    """Regression for #18187: calling connect() twice without disconnect() in
+    between (e.g. during an in-process reconnect attempt) must close the old
+    commands.Bot before creating a new one. Without this guard, two websockets
+    stay alive and both fire on_message, producing double responses with
+    different wording.
+    """
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
+    monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
+
+    intents = SimpleNamespace(
+        message_content=False, dm_messages=False, guild_messages=False,
+        members=False, voice_states=False,
+    )
+    monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
+
+    class TrackedBot(FakeBot):
+        """FakeBot that records close() calls and reports open/closed state."""
+        _closed = False
+
+        def is_closed(self):
+            return self._closed
+
+        async def close(self):
+            self._closed = True
+
+    created: list[TrackedBot] = []
+
+    def fake_bot_factory(*, command_prefix, intents, proxy=None, allowed_mentions=None, **_):
+        bot = TrackedBot(intents=intents, allowed_mentions=allowed_mentions)
+        created.append(bot)
+        return bot
+
+    monkeypatch.setattr(discord_platform.commands, "Bot", fake_bot_factory)
+    monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
+
+    # First connect — fresh adapter, no prior client.
+    assert await adapter.connect() is True
+    assert len(created) == 1
+    first_bot = created[0]
+    assert first_bot._closed is False, "first bot should still be open after connect()"
+
+    # Second connect WITHOUT disconnect — simulates an in-process reconnect.
+    # Without the fix, first_bot would remain open (zombie), and both would
+    # receive every Discord event, causing double responses.
+    assert await adapter.connect() is True
+    assert len(created) == 2
+    second_bot = created[1]
+
+    # The first bot must be closed before the second is assigned.
+    assert first_bot._closed is True, (
+        "First Discord client must be closed on re-entry of connect() to prevent "
+        "zombie websocket (#18187)"
+    )
+    assert second_bot._closed is False, "second bot should still be open"
+    assert adapter._client is second_bot
+
+    await adapter.disconnect()
+
+
@pytest.mark.asyncio
 async def test_connect_releases_token_lock_on_timeout(monkeypatch):
    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
@@ -1771,6 +1771,69 @@ class TestAdapterBehavior(unittest.TestCase):
        self.assertIn("GIF downgraded to file", caption)
        self.assertIn("look", caption)

+    def test_download_remote_document_reads_response_before_httpx_client_closes(self):
+        """#18451 — snapshot Content-Type + body while the httpx.AsyncClient
+        context is still active so pooled connections fully release on
+        exit.  Otherwise the response is only readable because httpx
+        eagerly buffers it; a future refactor to .stream() would silently
+        read-after-close."""
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        events: list[str] = []
+
+        class _FakeResponse:
+            headers = {"Content-Type": "application/octet-stream"}
+
+            def raise_for_status(self) -> None:
+                events.append("raise_for_status")
+
+            @property
+            def content(self) -> bytes:
+                events.append("content_read")
+                return b"doc-bytes"
+
+        class _FakeAsyncClient:
+            def __init__(self, *_a: object, **_k: object) -> None:
+                pass
+
+            async def __aenter__(self) -> "_FakeAsyncClient":
+                events.append("client_enter")
+                return self
+
+            async def __aexit__(self, *exc: object) -> None:
+                events.append("client_exit")
+
+            async def get(self, *_a: object, **_k: object) -> _FakeResponse:
+                events.append("get")
+                return _FakeResponse()
+
+        with tempfile.TemporaryDirectory() as tmp:
+            with patch.dict(os.environ, {"HERMES_HOME": tmp}, clear=False):
+                adapter = FeishuAdapter(PlatformConfig())
+
+                async def _run() -> tuple[str, str]:
+                    with patch("tools.url_safety.is_safe_url", return_value=True):
+                        with patch("httpx.AsyncClient", _FakeAsyncClient):
+                            with patch(
+                                "gateway.platforms.feishu.cache_document_from_bytes",
+                                return_value="/tmp/cached-doc.bin",
+                            ):
+                                return await adapter._download_remote_document(
+                                    "https://example.com/doc.bin",
+                                    default_ext=".bin",
+                                    preferred_name="doc",
+                                )
+
+                path, filename = asyncio.run(_run())
+
+        self.assertEqual(path, "/tmp/cached-doc.bin")
+        self.assertTrue(filename)
+        # content_read MUST happen before client_exit — otherwise we're
+        # reading response body after the connection pool has been torn
+        # down, which only works by accident (httpx's eager buffering).
+        self.assertLess(events.index("content_read"), events.index("client_exit"))
+
    def test_dedup_state_persists_across_adapter_restart(self):
        from gateway.config import PlatformConfig
        from gateway.platforms.feishu import FeishuAdapter
@@ -0,0 +1,114 @@
+"""Tests for the shared httpx.Limits helper that all long-lived platform
+adapters use to tighten their keep-alive pool.
+
+Context: #18451 — on macOS behind Cloudflare Warp, httpx's default
+keepalive_expiry=5s let idle CLOSE_WAIT sockets accumulate across
+multiple long-lived gateway adapters (QQ Bot, Feishu, WeCom, DingTalk,
+Signal, BlueBubbles, WeCom-callback) until the process hit the default
+256 fd limit.  These tests just verify the helper returns sensibly
+tuned limits and respects env-var overrides; the actual fd-pressure
+behaviour is only observable at runtime under load.
+"""
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _clear_env(monkeypatch):
+    monkeypatch.delenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", raising=False)
+    monkeypatch.delenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", raising=False)
+
+
+def test_returns_none_when_httpx_unavailable(monkeypatch):
+    """If httpx can't be imported, the helper returns None so callers
+    fall back to httpx's built-in Limits default without raising."""
+    import gateway.platforms._http_client_limits as mod
+    monkeypatch.setattr(mod, "httpx", None)
+    assert mod.platform_httpx_limits() is None
+
+
+def test_default_limits_tighten_keepalive_below_httpx_default():
+    import httpx
+    from gateway.platforms._http_client_limits import platform_httpx_limits
+    limits = platform_httpx_limits()
+    assert isinstance(limits, httpx.Limits)
+    # httpx default keepalive_expiry is 5.0 — ours must be shorter so
+    # CLOSE_WAIT sockets drain promptly behind proxies like Warp.
+    assert limits.keepalive_expiry is not None
+    assert limits.keepalive_expiry < 5.0
+    # max_keepalive_connections must be positive and reasonable for a
+    # single adapter (platform APIs rarely parallelise beyond ~10).
+    assert limits.max_keepalive_connections is not None
+    assert 1 <= limits.max_keepalive_connections <= 50
+
+
+def test_env_override_keepalive_expiry(monkeypatch):
+    monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "7.5")
+    from gateway.platforms._http_client_limits import platform_httpx_limits
+    limits = platform_httpx_limits()
+    assert limits.keepalive_expiry == 7.5
+
+
+def test_env_override_max_keepalive(monkeypatch):
+    monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "25")
+    from gateway.platforms._http_client_limits import platform_httpx_limits
+    limits = platform_httpx_limits()
+    assert limits.max_keepalive_connections == 25
+
+
+def test_env_override_rejects_garbage(monkeypatch):
+    """Malformed env values fall back to defaults rather than raising."""
+    monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "not-a-number")
+    monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "-3")
+    from gateway.platforms._http_client_limits import platform_httpx_limits
+    limits = platform_httpx_limits()
+    # Non-positive / non-numeric → fell back to defaults (not the override values)
+    assert limits.keepalive_expiry is not None and limits.keepalive_expiry > 0
+    assert limits.max_keepalive_connections is not None
+    assert limits.max_keepalive_connections > 0
+
+
+def test_helper_is_importable_from_every_platform_that_uses_it():
+    """Every persistent-httpx-client platform adapter imports this helper.
+    If any of those modules fails to import, this test surfaces it before
+    the regression shows up as a runtime adapter-startup crash."""
+    # Just importing exercises the helper's import path for each adapter.
+    import gateway.platforms.qqbot.adapter  # noqa: F401
+    import gateway.platforms.wecom  # noqa: F401
+    import gateway.platforms.dingtalk  # noqa: F401
+    import gateway.platforms.signal  # noqa: F401
+    import gateway.platforms.bluebubbles  # noqa: F401
+    import gateway.platforms.wecom_callback  # noqa: F401
+
+
+class TestWhatsappTypingLeakFix:
+    """#18451 — whatsapp.send_typing previously used a bare
+    `await self._http_session.post(...)` which leaked the aiohttp
+    response object until GC, holding its TCP socket in CLOSE_WAIT.
+    Must now wrap the call in `async with` so the response is
+    released immediately when the call returns.
+
+    We verify by inspecting the source text rather than exercising
+    the coroutine — the test suite would otherwise need a live
+    aiohttp server, and the contract we care about is structural.
+    """
+
+    def test_bare_await_removed(self):
+        import inspect
+        import gateway.platforms.whatsapp as mod
+
+        src = inspect.getsource(mod.WhatsAppAdapter.send_typing)
+        # The fix must be structural: the post() call is inside an
+        # `async with`, not a bare `await`.
+        assert "async with self._http_session.post(" in src, (
+            "send_typing must wrap self._http_session.post(...) in "
+            "`async with` to release the aiohttp response socket "
+            "(#18451). Otherwise the response sits in CLOSE_WAIT "
+            "until GC."
+        )
+        # The old bare-await form must be gone.
+        assert "await self._http_session.post(" not in src
@@ -0,0 +1,244 @@
+"""Tests for `/reload-skills` resyncing the Discord ``/skill`` autocomplete.
+
+Before this change, ``_register_skill_group`` captured the skill catalog
+in closure variables (``entries`` and ``skill_lookup``) so that the one
+``tree.add_command`` call at startup owned the only live copy of the
+skill list. The closure is never re-entered after startup, so
+``/reload-skills`` (which rescans the on-disk skill dir and refreshes
+the in-process registry) had no way to propagate its results into the
+autocomplete — new skills stayed invisible in the dropdown and deleted
+skills returned an "Unknown skill" error when the stale autocomplete
+entry was clicked.
+
+The fix promotes those two variables to instance attributes
+(``_skill_entries`` / ``_skill_lookup``) and exposes a
+``refresh_skill_group()`` method that rescans and mutates them in
+place. The gateway ``_handle_reload_skills_command`` iterates its
+connected adapters and calls the method on any that expose it.
+
+No ``tree.sync()`` is required because Discord fetches autocomplete
+options dynamically on every keystroke — we only need to rebind the
+data the live callbacks already read from.
+"""
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+
+def _make_adapter():
+    """Construct a DiscordAdapter without going through __init__ / token checks."""
+    from gateway.platforms.discord import DiscordAdapter
+    from gateway.platforms.base import Platform
+    adapter = object.__new__(DiscordAdapter)
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    # ``platform`` is set by BasePlatformAdapter.__init__, which we skip
+    # above; the inherited ``.name`` property dereferences it for log
+    # formatting, so set it explicitly.
+    adapter.platform = Platform.DISCORD
+    return adapter
+
+
+class TestRefreshSkillGroup:
+    def test_refresh_repopulates_entries_after_catalog_change(
+        self, monkeypatch
+    ) -> None:
+        """The initial catalog is replaced wholesale on refresh.
+
+        Mirrors the observable /reload-skills case: a user adds a new
+        skill to ~/.hermes/skills/, runs /reload-skills, and expects
+        the autocomplete to surface it on the very next keystroke.
+        """
+        adapter = _make_adapter()
+
+        # Start-of-process state: /register built the catalog from the
+        # original collector output.
+        adapter._skill_entries = [
+            ("old-skill", "Pre-existing skill", "/old-skill"),
+        ]
+        adapter._skill_lookup = {"old-skill": ("Pre-existing skill", "/old-skill")}
+        adapter._skill_group_reserved_names = set()
+        adapter._skill_group_hidden_count = 0
+
+        # User adds new-skill to disk and removes old-skill.
+        def fake_collector(*, reserved_names):
+            return (
+                {"creative": [("new-skill", "Fresh skill", "/new-skill")]},  # categories
+                [],  # uncategorized
+                0,   # hidden
+            )
+
+        monkeypatch.setattr(
+            "hermes_cli.commands.discord_skill_commands_by_category",
+            fake_collector,
+        )
+
+        new_count, hidden = adapter.refresh_skill_group()
+
+        assert new_count == 1
+        assert hidden == 0
+        # Old skill is gone, new skill is present.
+        names = [n for n, _d, _k in adapter._skill_entries]
+        assert names == ["new-skill"]
+        assert "old-skill" not in adapter._skill_lookup
+        assert adapter._skill_lookup["new-skill"] == ("Fresh skill", "/new-skill")
+
+    def test_refresh_sorts_entries_alphabetically(self, monkeypatch) -> None:
+        """Autocomplete order must be stable and predictable across refreshes."""
+        adapter = _make_adapter()
+        adapter._skill_entries = []
+        adapter._skill_lookup = {}
+        adapter._skill_group_reserved_names = set()
+        adapter._skill_group_hidden_count = 0
+
+        def fake_collector(*, reserved_names):
+            # Intentionally unsorted — the fix must resort.
+            return (
+                {"zzz": [("zebra", "", "/zebra")]},
+                [("alpha", "", "/alpha")],
+                0,
+            )
+
+        monkeypatch.setattr(
+            "hermes_cli.commands.discord_skill_commands_by_category",
+            fake_collector,
+        )
+
+        adapter.refresh_skill_group()
+
+        names = [n for n, _d, _k in adapter._skill_entries]
+        assert names == sorted(names) == ["alpha", "zebra"]
+
+    def test_refresh_handles_collector_exception_gracefully(
+        self, monkeypatch
+    ) -> None:
+        """A broken collector must not take down /reload-skills."""
+        adapter = _make_adapter()
+        adapter._skill_entries = [("keep", "kept", "/keep")]
+        adapter._skill_lookup = {"keep": ("kept", "/keep")}
+        adapter._skill_group_reserved_names = set()
+        adapter._skill_group_hidden_count = 0
+
+        def boom(*, reserved_names):
+            raise RuntimeError("simulated collector failure")
+
+        monkeypatch.setattr(
+            "hermes_cli.commands.discord_skill_commands_by_category",
+            boom,
+        )
+
+        new_count, hidden = adapter.refresh_skill_group()
+        # Returns previously-cached count, no crash, existing entries
+        # preserved so the live autocomplete keeps working.
+        assert new_count == 1
+        assert hidden == 0
+        assert adapter._skill_entries == [("keep", "kept", "/keep")]
+
+
+class TestRegisterSkillGroupUsesInstanceState:
+    """The closure-based ``entries`` / ``skill_lookup`` must be gone.
+
+    If the callbacks in ``_register_skill_group`` still close over
+    local variables instead of reading from ``self``, the refresh
+    method is useless — autocomplete will keep serving the stale list.
+
+    The full slash-command registration path pulls in ``discord.app_commands``
+    decorators (``@describe`` / ``@autocomplete`` / ``Command``), which
+    are unstubbed in the hermetic test env. We assert the data-shaped
+    side-effects instead: after ``_register_skill_group`` returns
+    (successfully or not), ``_skill_entries`` and ``_skill_lookup`` must
+    be populated from the collector output, because
+    ``_refresh_skill_catalog_state`` runs before any decorator evaluation.
+    """
+
+    def test_refresh_catalog_state_populates_instance_attrs(
+        self, monkeypatch
+    ) -> None:
+        adapter = _make_adapter()
+        adapter._skill_group_reserved_names = set()
+
+        def fake_collector(*, reserved_names):
+            return (
+                {"creative": [("ascii-art", "Make ASCII", "/ascii-art")]},
+                [],
+                0,
+            )
+        monkeypatch.setattr(
+            "hermes_cli.commands.discord_skill_commands_by_category",
+            fake_collector,
+        )
+
+        adapter._refresh_skill_catalog_state()
+
+        # Instance-level state populated — the autocomplete + handler
+        # callbacks both read from these, so `refresh_skill_group`
+        # mutating them in place is enough to pick up new skills.
+        assert adapter._skill_entries == [
+            ("ascii-art", "Make ASCII", "/ascii-art"),
+        ]
+        assert adapter._skill_lookup == {
+            "ascii-art": ("Make ASCII", "/ascii-art"),
+        }
+        assert adapter._skill_group_hidden_count == 0
+
+
+class TestHandleReloadSkillsCallsRefreshSkillGroup:
+    """Gateway-side integration: /reload-skills must call refresh on adapters."""
+
+    def test_orchestrator_calls_refresh_skill_group_on_every_adapter(self):
+        """Sync + async refresh_skill_group implementations both get awaited/called.
+
+        The orchestrator iterates ``self.adapters`` and calls
+        ``refresh_skill_group`` if it exists. Adapters that don't
+        implement it (today: everything except Discord) are silently
+        skipped without raising.
+        """
+        import asyncio
+        from unittest.mock import patch, MagicMock
+
+        # Import without constructing a real runner — test the method
+        # directly against an ``object.__new__`` instance.
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+
+        sync_refresh = MagicMock(return_value=(5, 0))
+        async_called = {"flag": False}
+
+        class AsyncAdapter:
+            name = "async-platform"
+            async def refresh_skill_group(self):
+                async_called["flag"] = True
+                return (3, 0)
+
+        class SyncAdapter:
+            name = "sync-platform"
+            refresh_skill_group = sync_refresh
+
+        class NoOpAdapter:
+            name = "other"
+            # No refresh_skill_group — must not crash.
+
+        runner.adapters = {
+            "discord": AsyncAdapter(),
+            "slack": SyncAdapter(),
+            "telegram": NoOpAdapter(),
+        }
+
+        # Mock reload_skills itself so no disk scan runs.
+        fake_result = {"added": [], "removed": [], "total": 7}
+        with patch(
+            "agent.skill_commands.reload_skills", return_value=fake_result
+        ):
+            event = MagicMock()
+            event.source = MagicMock()
+            # _session_key_for_source may be called — make it safe.
+            runner._session_key_for_source = lambda src: None
+            runner._pending_skills_reload_notes = {}
+
+            result = asyncio.get_event_loop().run_until_complete(
+                runner._handle_reload_skills_command(event)
+            )
+
+        assert "Skills Reloaded" in result
+        assert sync_refresh.called, "sync adapter refresh must be invoked"
+        assert async_called["flag"], "async adapter refresh must be awaited"
@@ -242,4 +242,89 @@ async def test_send_restart_notification_cleans_up_on_send_failure(

    await runner._send_restart_notification()

-    assert not notify_path.exists()  # cleaned up despite error
+    # File cleaned up even though send raised.
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_logs_warning_on_sendresult_failure(
+    tmp_path, monkeypatch, caplog
+):
+    """Adapter that returns SendResult(success=False) must log a WARNING, not INFO.
+
+    Regression guard: adapter.send() catches provider errors (e.g. Telegram
+    "Chat not found") and returns SendResult(success=False) rather than
+    raising. The caller previously ignored the return value and always
+    logged "Sent restart notification to ..." at INFO — masking real
+    delivery failures behind a fake success line.
+    """
+    from gateway.platforms.base import SendResult
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock(
+        return_value=SendResult(success=False, error="Chat not found"),
+    )
+
+    with caplog.at_level("DEBUG", logger="gateway.run"):
+        await runner._send_restart_notification()
+
+    success_lines = [
+        r for r in caplog.records
+        if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
+    ]
+    warning_lines = [
+        r for r in caplog.records
+        if r.levelname == "WARNING"
+        and "was not delivered" in r.getMessage()
+        and "Chat not found" in r.getMessage()
+    ]
+    assert not success_lines, (
+        "Expected no INFO 'Sent restart notification' line when send failed, "
+        f"got: {[r.getMessage() for r in success_lines]}"
+    )
+    assert warning_lines, (
+        "Expected a WARNING line mentioning the failure; "
+        f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
+    )
+    # Still cleans up.
+    assert not notify_path.exists()
+
+
+@pytest.mark.asyncio
+async def test_send_restart_notification_logs_info_on_sendresult_success(
+    tmp_path, monkeypatch, caplog
+):
+    """Adapter returning SendResult(success=True) keeps the INFO log line."""
+    from gateway.platforms.base import SendResult
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+
+    notify_path = tmp_path / ".restart_notify.json"
+    notify_path.write_text(json.dumps({
+        "platform": "telegram",
+        "chat_id": "42",
+    }))
+
+    runner, adapter = make_restart_runner()
+    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="m-1"))
+
+    with caplog.at_level("DEBUG", logger="gateway.run"):
+        await runner._send_restart_notification()
+
+    success_lines = [
+        r for r in caplog.records
+        if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
+    ]
+    assert success_lines, (
+        "Expected INFO 'Sent restart notification' when send succeeded; "
+        f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
+    )
+    assert not notify_path.exists()
@@ -132,6 +132,7 @@ async def test_reconnect_success_resets_error_count():

    mock_app = MagicMock()
    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock(return_value=MagicMock())  # heartbeat probe path
    adapter._app = mock_app

    with patch("asyncio.sleep", new_callable=AsyncMock):
@@ -139,6 +140,15 @@ async def test_reconnect_success_resets_error_count():

    assert adapter._polling_network_error_count == 0

+    # Clean up the heartbeat-probe task scheduled after a successful reconnect.
+    pending = [t for t in adapter._background_tasks if not t.done()]
+    for t in pending:
+        t.cancel()
+        try:
+            await t
+        except (asyncio.CancelledError, Exception):
+            pass
+

@pytest.mark.asyncio
 async def test_reconnect_triggers_fatal_after_max_retries():
@@ -284,3 +294,182 @@ async def test_drain_helper_noop_without_app():
    adapter._app = None
    # Should not raise
    await adapter._drain_polling_connections()
+
+
+# ── Heartbeat probe ──────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_heartbeat_probe_no_op_when_polling_healthy():
+    """
+    Probe scheduled after a successful reconnect: Updater.running=True and
+    bot.get_me() returns quickly → recovery confirmed, no further action.
+    """
+    adapter = _make_adapter()
+
+    mock_updater = MagicMock()
+    mock_updater.running = True
+
+    mock_app = MagicMock()
+    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
+    adapter._app = mock_app
+
+    adapter._handle_polling_network_error = AsyncMock()
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        await adapter._verify_polling_after_reconnect()
+
+    mock_app.bot.get_me.assert_awaited_once()
+    adapter._handle_polling_network_error.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_heartbeat_probe_reenters_ladder_when_updater_not_running():
+    """
+    If Updater.running has flipped to False by the heartbeat delay, treat
+    as wedged: re-enter the reconnect ladder.
+    """
+    adapter = _make_adapter()
+
+    mock_updater = MagicMock()
+    mock_updater.running = False
+
+    mock_app = MagicMock()
+    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock()
+    adapter._app = mock_app
+
+    adapter._handle_polling_network_error = AsyncMock()
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        await adapter._verify_polling_after_reconnect()
+
+    mock_app.bot.get_me.assert_not_called()
+    adapter._handle_polling_network_error.assert_awaited_once()
+    err = adapter._handle_polling_network_error.await_args.args[0]
+    assert isinstance(err, RuntimeError)
+    assert "not running" in str(err).lower()
+
+
+@pytest.mark.asyncio
+async def test_heartbeat_probe_reenters_ladder_when_get_me_times_out():
+    """
+    If bot.get_me() hangs longer than PROBE_TIMEOUT, treat as wedged.
+    Simulates the connection-pool wedge that motivated this fix.
+    """
+    adapter = _make_adapter()
+
+    mock_updater = MagicMock()
+    mock_updater.running = True
+
+    async def hang_forever(*args, **kwargs):
+        await asyncio.sleep(3600)
+
+    mock_app = MagicMock()
+    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock(side_effect=hang_forever)
+    adapter._app = mock_app
+
+    adapter._handle_polling_network_error = AsyncMock()
+
+    async def fast_wait_for(coro, timeout):
+        if asyncio.iscoroutine(coro):
+            coro.close()
+        raise asyncio.TimeoutError()
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        with patch("gateway.platforms.telegram.asyncio.wait_for", new=fast_wait_for):
+            await adapter._verify_polling_after_reconnect()
+
+    adapter._handle_polling_network_error.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_heartbeat_probe_reenters_ladder_on_get_me_network_error():
+    """
+    Any exception raised by bot.get_me() (NetworkError, ConnectionError, etc.)
+    should re-enter the reconnect ladder with the original exception.
+    """
+    adapter = _make_adapter()
+
+    mock_updater = MagicMock()
+    mock_updater.running = True
+
+    mock_app = MagicMock()
+    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock(side_effect=ConnectionError("pool wedged"))
+    adapter._app = mock_app
+
+    adapter._handle_polling_network_error = AsyncMock()
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        await adapter._verify_polling_after_reconnect()
+
+    adapter._handle_polling_network_error.assert_awaited_once()
+    assert isinstance(
+        adapter._handle_polling_network_error.await_args.args[0], ConnectionError
+    )
+
+
+@pytest.mark.asyncio
+async def test_heartbeat_probe_skips_when_already_fatal():
+    """
+    If the adapter is already in fatal-error state by the time the probe
+    delay elapses, the probe should bail without further action.
+    """
+    adapter = _make_adapter()
+    adapter._set_fatal_error("telegram_polling_conflict", "already fatal", retryable=False)
+
+    mock_app = MagicMock()
+    mock_app.bot.get_me = AsyncMock()
+    adapter._app = mock_app
+
+    adapter._handle_polling_network_error = AsyncMock()
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        await adapter._verify_polling_after_reconnect()
+
+    mock_app.bot.get_me.assert_not_called()
+    adapter._handle_polling_network_error.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_reconnect_schedules_heartbeat_probe_on_success():
+    """
+    After a successful start_polling() in the reconnect path, a probe task
+    must be added to _background_tasks. Without it, a wedged Updater would
+    sit silent indefinitely with no further error_callback to advance the
+    reconnect ladder.
+    """
+    adapter = _make_adapter()
+    adapter._polling_network_error_count = 1
+
+    mock_updater = MagicMock()
+    mock_updater.running = True
+    mock_updater.stop = AsyncMock()
+    mock_updater.start_polling = AsyncMock()  # succeeds
+
+    mock_app = MagicMock()
+    mock_app.updater = mock_updater
+    mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
+    adapter._app = mock_app
+
+    initial_count = len(adapter._background_tasks)
+
+    with patch("asyncio.sleep", new_callable=AsyncMock):
+        await adapter._handle_polling_network_error(Exception("Bad Gateway"))
+
+    assert len(adapter._background_tasks) > initial_count, (
+        "Expected a heartbeat probe task to be scheduled after a successful "
+        "reconnect's start_polling()"
+    )
+
+    # Clean up.
+    pending = [t for t in adapter._background_tasks if not t.done()]
+    for t in pending:
+        t.cancel()
+        try:
+            await t
+        except (asyncio.CancelledError, Exception):
+            pass
@@ -0,0 +1,185 @@
+"""Tests for gateway.run._check_unavailable_skill.
+
+Regression coverage for the dir-name-vs-frontmatter-name drift bug.
+The hint function used to compare the skill's parent-directory name
+against the typed command and the disabled list. That silently missed
+every skill whose directory name differs from its declared frontmatter
+name (~19 skills on a standard install), so users typing a real slug
+like ``/stable-diffusion-image-generation`` got a generic "unknown
+command" response instead of the intended "disabled — enable with …"
+or "not installed — install with …" hint.
+
+These tests pin the fixed behavior:
+
+* Slug is derived from the frontmatter ``name:`` (exactly matching
+  :func:`agent.skill_commands.scan_skill_commands`), so the slug differs
+  from the directory name when the declared name is multi-word.
+* ``disabled`` membership is checked by the declared name, because that
+  is what :func:`hermes_cli.skills_config.save_disabled_skills` stores.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+
+@pytest.fixture
+def tmp_skills(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Isolated skills dir + HERMES_HOME so the real user config is untouched."""
+    home = tmp_path / ".hermes"
+    home.mkdir()
+    (home / "skills").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    return home / "skills"
+
+
+def _write_skill(skills_dir: Path, rel: str, frontmatter_name: str) -> Path:
+    """Create a SKILL.md at ``<skills_dir>/<rel>/SKILL.md``."""
+    skill_dir = skills_dir / rel
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    skill_md = skill_dir / "SKILL.md"
+    skill_md.write_text(
+        f"---\nname: {frontmatter_name}\ndescription: test skill\n---\nBody.\n",
+        encoding="utf-8",
+    )
+    return skill_md
+
+
+def test_frontmatter_slug_matched_even_when_dir_name_differs(
+    tmp_skills: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Directory ``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``.
+
+    Command typed: ``stable-diffusion-image-generation`` (the slug the
+    agent actually registers). The old dir-name-based check would have
+    compared ``stable-diffusion`` to the typed command and missed.
+    """
+    from gateway import run as gateway_run
+
+    _write_skill(tmp_skills, "mlops/stable-diffusion", "Stable Diffusion Image Generation")
+
+    # Config disables by declared name (matches what `hermes skills config` writes).
+    monkeypatch.setattr(
+        "gateway.run._get_disabled_skill_names",
+        lambda: {"Stable Diffusion Image Generation"},
+        raising=False,
+    )
+    with patch(
+        "tools.skills_tool._get_disabled_skill_names",
+        return_value={"Stable Diffusion Image Generation"},
+    ), patch(
+        "agent.skill_utils.get_all_skills_dirs",
+        return_value=[tmp_skills],
+    ):
+        msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
+
+    assert msg is not None, (
+        "expected a 'disabled' hint for the frontmatter-derived slug; "
+        "the old code compared the dir name 'stable-diffusion' and returned None"
+    )
+    assert "disabled" in msg.lower()
+    assert "hermes skills config" in msg
+
+
+def test_unknown_command_still_returns_none(
+    tmp_skills: Path,
+) -> None:
+    """A command that matches no on-disk skill still returns None."""
+    from gateway import run as gateway_run
+
+    _write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
+
+    with patch(
+        "tools.skills_tool._get_disabled_skill_names", return_value=set()
+    ), patch(
+        "agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
+    ):
+        assert gateway_run._check_unavailable_skill("no-such-skill") is None
+
+
+def test_matched_but_not_disabled_returns_none(
+    tmp_skills: Path,
+) -> None:
+    """A skill that exists and isn't disabled shouldn't produce a hint."""
+    from gateway import run as gateway_run
+
+    _write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
+
+    with patch(
+        "tools.skills_tool._get_disabled_skill_names", return_value=set()
+    ), patch(
+        "agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
+    ):
+        assert gateway_run._check_unavailable_skill("ascii-art") is None
+
+
+def test_slug_normalization_strips_non_alnum(
+    tmp_skills: Path,
+) -> None:
+    """Frontmatter ``C++ Code Review`` → slug ``c-code-review`` (``+`` stripped)."""
+    from gateway import run as gateway_run
+
+    _write_skill(tmp_skills, "software-development/cpp-review", "C++ Code Review")
+
+    with patch(
+        "tools.skills_tool._get_disabled_skill_names",
+        return_value={"C++ Code Review"},
+    ), patch(
+        "agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
+    ):
+        msg = gateway_run._check_unavailable_skill("c-code-review")
+
+    assert msg is not None
+    assert "disabled" in msg.lower()
+
+
+def test_optional_skill_uses_frontmatter_slug(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Same drift bug applies to the optional-skills branch.
+
+    Before: directory name was matched against the typed command, so an
+    optional skill at ``optional-skills/mlops/stable-diffusion/SKILL.md``
+    with frontmatter ``Stable Diffusion Image Generation`` returned None
+    when the user typed the real slug.
+    """
+    from gateway import run as gateway_run
+
+    # Build an isolated optional-skills dir
+    optional = tmp_path / "optional-skills"
+    skill_dir = optional / "mlops" / "stable-diffusion"
+    skill_dir.mkdir(parents=True)
+    (skill_dir / "SKILL.md").write_text(
+        "---\nname: Stable Diffusion Image Generation\ndescription: test\n---\n",
+        encoding="utf-8",
+    )
+
+    # Point the optional lookup at our tmp dir. The source reads from
+    # ``get_optional_skills_dir(repo_root / "optional-skills")`` — we
+    # can't easily retarget ``repo_root``, so patch the resolver.
+    monkeypatch.setattr(
+        "hermes_constants.get_optional_skills_dir",
+        lambda _default: optional,
+        raising=False,
+    )
+
+    # Ensure the "disabled" branch doesn't match anything so we fall
+    # through to the optional-skills branch.
+    empty_skills = tmp_path / "empty-skills"
+    empty_skills.mkdir()
+    with patch(
+        "tools.skills_tool._get_disabled_skill_names", return_value=set()
+    ), patch(
+        "agent.skill_utils.get_all_skills_dirs", return_value=[empty_skills]
+    ):
+        msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
+
+    assert msg is not None, (
+        "optional-skills branch should recognize the frontmatter-derived slug; "
+        "the old dir-name-based check returned None here too"
+    )
+    assert "not installed" in msg.lower()
+    assert "official/mlops/stable-diffusion" in msg
@@ -284,6 +284,66 @@ class TestBridgeRuntimeFailure:
        mock_fh.close.assert_called_once()
        assert adapter._bridge_log_fh is None

+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("returncode", [0, -2, -15])
+    async def test_shutdown_suppresses_fatal_on_planned_bridge_exit(self, returncode):
+        """During graceful disconnect(), SIGTERM/SIGINT/clean-exit are NOT fatal.
+
+        Regression guard for the bug where every gateway shutdown/restart
+        logged "Fatal whatsapp adapter error (whatsapp_bridge_exited)" and
+        dispatched a fatal-error notification just before the normal
+        "✓ whatsapp disconnected" — because _check_managed_bridge_exit()
+        saw the bridge's returncode of -15 (our own SIGTERM) and classified
+        it as an unexpected crash.
+        """
+        adapter = _make_adapter()
+        fatal_handler = AsyncMock()
+        adapter.set_fatal_error_handler(fatal_handler)
+        adapter._running = True
+        adapter._http_session = MagicMock()
+        adapter._bridge_log_fh = MagicMock()
+        adapter._shutting_down = True  # disconnect() sets this before SIGTERM
+
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = returncode
+        adapter._bridge_process = mock_proc
+
+        result = await adapter._check_managed_bridge_exit()
+
+        assert result is None, (
+            f"returncode={returncode} during shutdown should be suppressed, "
+            f"got fatal message: {result!r}"
+        )
+        assert adapter.fatal_error_code is None
+        fatal_handler.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_shutdown_still_surfaces_nonzero_crash(self):
+        """Even during shutdown, a truly crashed bridge (e.g. returncode 9) is fatal.
+
+        The suppression list is deliberately narrow (0, -2, -15) so that
+        OOM-kill (137), assertion failures, or custom error exits still
+        reach the fatal-error handler and user notification path.
+        """
+        adapter = _make_adapter()
+        fatal_handler = AsyncMock()
+        adapter.set_fatal_error_handler(fatal_handler)
+        adapter._running = True
+        adapter._http_session = MagicMock()
+        adapter._bridge_log_fh = MagicMock()
+        adapter._shutting_down = True
+
+        mock_proc = MagicMock()
+        mock_proc.poll.return_value = 137  # SIGKILL / OOM-kill
+        adapter._bridge_process = mock_proc
+
+        result = await adapter._check_managed_bridge_exit()
+
+        assert result is not None
+        assert "exited unexpectedly" in result
+        assert adapter.fatal_error_code == "whatsapp_bridge_exited"
+        fatal_handler.assert_awaited_once()
+
    @pytest.mark.asyncio
    async def test_closed_when_http_not_ready(self):
        """Health endpoint never returns 200 within 15 attempts."""
@@ -203,6 +203,30 @@ class TestListAuthenticatedProvidersBedrock:
        bedrock = next((p for p in providers if p["slug"] == "bedrock"), None)
        assert bedrock is None, "bedrock should NOT appear when AWS credentials are absent"

+    def test_non_bedrock_picker_does_not_probe_full_aws_chain(self, monkeypatch):
+        """Non-Bedrock provider discovery must not touch boto3's full credential chain."""
+        from hermes_cli.model_switch import list_authenticated_providers
+
+        monkeypatch.delenv("AWS_PROFILE", raising=False)
+        monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
+        monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
+        monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
+        monkeypatch.delenv("AWS_WEB_IDENTITY_TOKEN_FILE", raising=False)
+        monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False)
+        monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False)
+
+        calls = {"has_aws_credentials": 0}
+
+        def _has_aws_credentials():
+            calls["has_aws_credentials"] += 1
+            return False
+
+        with patch("agent.bedrock_adapter.has_aws_credentials", side_effect=_has_aws_credentials):
+            providers = list_authenticated_providers(current_provider="openrouter", max_models=0)
+
+        assert calls["has_aws_credentials"] == 0
+        assert all(p["slug"] != "bedrock" for p in providers)
+
    def test_bedrock_falls_back_to_curated_when_discovery_fails(self, monkeypatch):
        """When discover_bedrock_models() raises, fall back to curated list without crashing."""
        from hermes_cli.model_switch import list_authenticated_providers
@@ -899,6 +899,73 @@ class TestTelegramMenuCommands:
        assert "my_enabled_skill" in menu_names
        assert "my_disabled_skill" not in menu_names

+    def test_external_dir_skills_included_in_telegram_menu(self, tmp_path, monkeypatch):
+        """External skills (``skills.external_dirs``) must appear in the Telegram menu.
+
+        Regression test for #8110 — external skills were visible to the
+        agent and CLI but silently excluded from gateway slash menus
+        because ``_collect_gateway_skill_entries`` only accepted skills
+        whose path started with ``SKILLS_DIR``.
+
+        Also verifies the trailing-slash boundary: a directory that
+        simply shares a prefix with a configured ``external_dirs`` entry
+        (``/tmp/my-skills-extra`` vs ``/tmp/my-skills``) must NOT be
+        admitted.
+        """
+        from unittest.mock import patch
+
+        local_dir = tmp_path / "skills"
+        local_dir.mkdir()
+        external_dir = tmp_path / "my-skills"
+        external_dir.mkdir()
+        lookalike_dir = tmp_path / "my-skills-extra"
+        lookalike_dir.mkdir()
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text(
+            f"skills:\n  external_dirs:\n    - {external_dir}\n"
+        )
+
+        fake_cmds = {
+            "/local-one": {
+                "name": "local-one",
+                "description": "Local",
+                "skill_md_path": f"{local_dir}/local-one/SKILL.md",
+                "skill_dir": f"{local_dir}/local-one",
+            },
+            "/morning-briefing": {
+                "name": "morning-briefing",
+                "description": "External skill",
+                "skill_md_path": f"{external_dir}/morning-briefing/SKILL.md",
+                "skill_dir": f"{external_dir}/morning-briefing",
+            },
+            "/lookalike-skill": {
+                "name": "lookalike-skill",
+                "description": "Lives in a sibling dir that shares a prefix",
+                "skill_md_path": f"{lookalike_dir}/lookalike-skill/SKILL.md",
+                "skill_dir": f"{lookalike_dir}/lookalike-skill",
+            },
+        }
+
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", local_dir),
+            patch(
+                "agent.skill_utils.get_external_skills_dirs",
+                return_value=[external_dir],
+            ),
+        ):
+            menu, _ = telegram_menu_commands(max_commands=100)
+
+        menu_names = {n for n, _ in menu}
+        assert "local_one" in menu_names, "local skill must appear"
+        assert "morning_briefing" in menu_names, (
+            "external skill from skills.external_dirs must appear (fixes #8110)"
+        )
+        assert "lookalike_skill" not in menu_names, (
+            "prefix-match sibling directories must not be admitted"
+        )
+
    def test_special_chars_in_skill_names_sanitized(self, tmp_path, monkeypatch):
        """Skills with +, /, or other special chars produce valid Telegram names."""
        from unittest.mock import patch
@@ -1353,6 +1420,119 @@ class TestDiscordSkillCommandsByCategory:
        assert "vllm" in names
        assert len(uncategorized) == 0

+    def test_no_legacy_25x25_cap(self, tmp_path, monkeypatch):
+        """The old nested-layout caps (25 groups × 25 skills/group) are gone.
+
+        The live caller flattens categories into a single autocomplete list,
+        which Discord fetches dynamically — the per-command 8KB payload
+        concern from the old nested layout (#11321, #10259) no longer applies.
+        Guards against accidentally re-introducing the caps, which would
+        silently drop skills in the 26th+ alphabetical category (the exact
+        failure mode users were hitting with 29 category dirs on real
+        installs).
+        """
+        from unittest.mock import patch
+
+        fake_skills_dir = str(tmp_path / "skills")
+
+        # Build 30 categories (> old _MAX_GROUPS=25) each with 30 skills
+        # (> old _MAX_PER_GROUP=25).
+        fake_cmds = {}
+        for c in range(30):
+            cat = f"cat{c:02d}"  # cat00, cat01, ..., cat29 — 30 categories
+            for s in range(30):
+                name = f"skill-{c:02d}-{s:02d}"
+                skill_subdir = tmp_path / "skills" / cat / name
+                skill_subdir.mkdir(parents=True, exist_ok=True)
+                (skill_subdir / "SKILL.md").write_text("---\nname: x\n---\n")
+                fake_cmds[f"/{name}"] = {
+                    "name": name,
+                    "description": f"Category {cat} skill {s}",
+                    "skill_md_path": f"{fake_skills_dir}/{cat}/{name}/SKILL.md",
+                }
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        # Every category should be present — no 25-group cap
+        assert len(categories) == 30, (
+            f"expected all 30 categories, got {len(categories)} "
+            f"(cap from old nested layout must be removed)"
+        )
+        # Every skill in every category must be present — no 25-per-group cap
+        for cat_name, entries in categories.items():
+            assert len(entries) == 30, (
+                f"category {cat_name}: expected 30 skills, got {len(entries)} "
+                f"(cap from old nested layout must be removed)"
+            )
+        # Nothing should be reported hidden for the cap reason (the only
+        # legitimate hidden reason now is name clamp collisions, which
+        # don't happen here since all names are unique).
+        assert hidden == 0
+
+    def test_external_dirs_skills_included(self, tmp_path, monkeypatch):
+        """Skills in ``skills.external_dirs`` must appear in /skill autocomplete.
+
+        #18741 fixed this for the flat ``discord_skill_commands`` collector
+        but left ``discord_skill_commands_by_category`` (the live caller for
+        Discord's ``/skill`` command) still filtering by
+        ``SKILLS_DIR`` prefix only. Regression guard that both collectors
+        now accept external-dir skills.
+        """
+        from unittest.mock import patch
+
+        local_skills_dir = tmp_path / "local-skills"
+        external_dir = tmp_path / "external-skills"
+
+        (local_skills_dir / "creative" / "local-skill").mkdir(parents=True)
+        (local_skills_dir / "creative" / "local-skill" / "SKILL.md").write_text("")
+
+        (external_dir / "mlops" / "external-skill").mkdir(parents=True)
+        (external_dir / "mlops" / "external-skill" / "SKILL.md").write_text("")
+
+        fake_cmds = {
+            "/local-skill": {
+                "name": "local-skill",
+                "description": "Local",
+                "skill_md_path": str(local_skills_dir / "creative" / "local-skill" / "SKILL.md"),
+            },
+            "/external-skill": {
+                "name": "external-skill",
+                "description": "External",
+                "skill_md_path": str(external_dir / "mlops" / "external-skill" / "SKILL.md"),
+            },
+        }
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        with (
+            patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
+            patch("tools.skills_tool.SKILLS_DIR", local_skills_dir),
+            patch(
+                "agent.skill_utils.get_external_skills_dirs",
+                return_value=[external_dir],
+            ),
+        ):
+            categories, uncategorized, hidden = discord_skill_commands_by_category(
+                reserved_names=set(),
+            )
+
+        # Local skill → grouped under "creative"
+        assert "creative" in categories
+        assert any(n == "local-skill" for n, _d, _k in categories["creative"])
+        # External skill → grouped under its own top-level dir "mlops"
+        assert "mlops" in categories, (
+            "external-dir skills must be included — the old SKILLS_DIR-only "
+            "prefix check was broken for by_category (completes #18741)"
+        )
+        assert any(n == "external-skill" for n, _d, _k in categories["mlops"])
+        assert uncategorized == []
+        assert hidden == 0
+

 # ---------------------------------------------------------------------------
 # Plugin slash command integration
@@ -0,0 +1,171 @@
+"""Tests for Discord /skill 32-char clamp collision warnings.
+
+Discord's per-command name limit is 32 chars, so
+``discord_skill_commands_by_category`` clamps skill slugs to that width
+before deduping. When two skills share the same 32-char prefix, only
+the first (alphabetical) wins; the second is dropped. Previously the
+drop was silent — the ``hidden`` count incremented but nothing named
+which skills collided, so authors had no way to discover the drop
+short of noticing that their skill was missing from the autocomplete.
+
+This module pins the upgraded behavior: a WARNING log with both full
+cmd_keys + the clamped name, so whoever named the skills sees the
+collision and can rename one.
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_clamp_collision_emits_warning_naming_both_skills(
+    tmp_path: Path, caplog
+) -> None:
+    """Two skills with identical first 32 chars — warning names both."""
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    # Craft cmd_keys that share the first 32 chars.
+    # 40-char prefix 'skill-collision-prefix-identical-first-32'
+    #   -> clamped to 'skill-collision-prefix-identical'
+    prefix = "skill-collision-prefix-identical"  # exactly 32 chars
+    name_a = prefix + "-alpha"  # /skill-collision-prefix-identical-alpha
+    name_b = prefix + "-bravo"  # /skill-collision-prefix-identical-bravo
+    assert name_a[:32] == name_b[:32] == prefix
+
+    skills_dir = tmp_path / "skills"
+    for nm in (name_a, name_b):
+        d = skills_dir / "creative" / nm
+        d.mkdir(parents=True)
+        (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        f"/{name_a}": {
+            "name": name_a,
+            "description": "Alpha",
+            "skill_md_path": str(skills_dir / "creative" / name_a / "SKILL.md"),
+        },
+        f"/{name_b}": {
+            "name": name_b,
+            "description": "Bravo",
+            "skill_md_path": str(skills_dir / "creative" / name_b / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(),
+        )
+
+    # One skill made it through, one was dropped (hidden counted).
+    assert hidden == 1
+    kept_names = [n for n, _d, _k in categories.get("creative", [])]
+    assert len(kept_names) == 1
+    # Alphabetical iteration means the -alpha variant wins the slot.
+    assert kept_names[0] == prefix  # clamped
+
+    # Exactly one warning, naming BOTH full cmd_keys and the clamped name.
+    warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING and "clamp" in r.getMessage()
+    ]
+    assert len(warnings) == 1, (
+        f"expected exactly one clamp-collision warning, got {len(warnings)}: "
+        f"{[r.getMessage() for r in warnings]}"
+    )
+    msg = warnings[0].getMessage()
+    assert f"/{name_a}" in msg, f"winner not named in warning: {msg!r}"
+    assert f"/{name_b}" in msg, f"loser not named in warning: {msg!r}"
+    assert prefix in msg, f"clamped name not in warning: {msg!r}"
+
+
+def test_clamp_collision_with_reserved_name_emits_distinct_warning(
+    tmp_path: Path, caplog
+) -> None:
+    """A skill clashing with a reserved gateway command gets its own phrasing.
+
+    The reserved-vs-skill case is operationally different — the fix is
+    still "rename the skill," but there's no second skill to also
+    rename. The warning should say so explicitly.
+    """
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    # Reserved name 'help' is 4 chars — make a skill whose slug
+    # clamps to 'help' (so, exactly 'help').
+    reserved = "help"
+    skills_dir = tmp_path / "skills"
+    d = skills_dir / "creative" / reserved
+    d.mkdir(parents=True)
+    (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        f"/{reserved}": {
+            "name": reserved,
+            "description": "desc",
+            "skill_md_path": str(d / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names={"help"},
+        )
+
+    # Skill dropped in favor of the reserved command.
+    assert hidden == 1
+    assert categories == {}
+    assert uncategorized == []
+
+    warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING and "reserved" in r.getMessage()
+    ]
+    assert len(warnings) == 1, (
+        f"expected one reserved-name collision warning, got "
+        f"{[r.getMessage() for r in warnings]}"
+    )
+    msg = warnings[0].getMessage()
+    assert f"/{reserved}" in msg
+    assert "reserved" in msg.lower()
+
+
+def test_no_collision_no_warning(tmp_path: Path, caplog) -> None:
+    """Sanity: two distinct-prefix skills produce zero warnings."""
+    from hermes_cli.commands import discord_skill_commands_by_category
+
+    skills_dir = tmp_path / "skills"
+    for nm in ("alpha", "bravo"):
+        d = skills_dir / "creative" / nm
+        d.mkdir(parents=True)
+        (d / "SKILL.md").write_text("---\nname: x\n---\n")
+
+    fake_cmds = {
+        "/alpha": {
+            "name": "alpha", "description": "",
+            "skill_md_path": str(skills_dir / "creative" / "alpha" / "SKILL.md"),
+        },
+        "/bravo": {
+            "name": "bravo", "description": "",
+            "skill_md_path": str(skills_dir / "creative" / "bravo" / "SKILL.md"),
+        },
+    }
+
+    with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
+        patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
+    ), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
+        categories, uncategorized, hidden = discord_skill_commands_by_category(
+            reserved_names=set(),
+        )
+
+    assert hidden == 0
+    assert {n for n, _d, _k in categories["creative"]} == {"alpha", "bravo"}
+    clamp_warnings = [
+        r for r in caplog.records
+        if r.levelno == logging.WARNING
+        and ("clamp" in r.getMessage() or "reserved" in r.getMessage())
+    ]
+    assert clamp_warnings == []
@@ -51,6 +51,57 @@ class TestProviderEnvDetection:
        assert not _has_provider_env_config(content)


+class TestDoctorEnvFileEncoding:
+    """Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
+    Chinese locale (GBK) because `.env` was read with Path.read_text() which
+    defaults to the system locale encoding, not UTF-8."""
+
+    def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
+        self, monkeypatch, tmp_path
+    ):
+        import pathlib
+
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        # Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
+        # 0x94 byte is exactly the one the issue reporter hit: it's invalid
+        # as a GBK trailing byte in this position, so locale-default reads
+        # raise UnicodeDecodeError on Chinese Windows.
+        env_path = hermes_home / ".env"
+        env_path.write_text(
+            "OPENAI_API_KEY=sk-test  # em-dash here — should not crash\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
+
+        orig_read_text = pathlib.Path.read_text
+
+        def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
+            # Simulate a GBK locale: refuse to decode this specific UTF-8
+            # .env unless the caller pins encoding="utf-8".
+            if self == env_path and encoding != "utf-8":
+                raise UnicodeDecodeError(
+                    "gbk", b"\x94", 0, 1, "illegal multibyte sequence"
+                )
+            return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
+
+        monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
+
+        # Short-circuit the expensive tool-availability probe — we only
+        # need doctor to reach the .env read without crashing.
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+        # Run doctor. If the .env read still uses locale encoding, this
+        # raises UnicodeDecodeError and the test fails.
+        with pytest.raises(SystemExit):
+            doctor_mod.run_doctor(Namespace(fix=False))
+
+
 class TestDoctorToolAvailabilityOverrides:
    def test_marks_honcho_available_when_configured(self, monkeypatch):
        monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)
@@ -4,11 +4,16 @@ from hermes_cli.setup import setup_agent_settings


 def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
-    """The helper text should match the value shown in the prompt."""
+    """The helper text should match the value shown in the prompt.
+
+    After PR#18413 max_turns is read exclusively from config.yaml — the
+    .env `HERMES_MAX_ITERATIONS` fallback was removed because it was
+    shadowing the user's current config (see the 60-vs-500 incident).
+    """
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))

    config = {
-        "agent": {"max_turns": 90},
+        "agent": {"max_turns": 60},
        "display": {"tool_progress": "all"},
        "compression": {"threshold": 0.50},
        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
@@ -16,10 +21,10 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk

    prompt_answers = iter(["60", "all", "0.5"])

-    monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+    monkeypatch.setattr("hermes_cli.setup.remove_env_value", lambda *args, **kwargs: None)
    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)

    setup_agent_settings(config)
@@ -27,3 +32,47 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
    out = capsys.readouterr().out
    assert "Press Enter to keep 60." in out
    assert "Default is 90" not in out
+
+
+def test_setup_agent_settings_prefers_config_over_stale_env(tmp_path, monkeypatch, capsys):
+    """Config.yaml wins even when a stale .env value disagrees.
+
+    Regression guard for the bug where `.env HERMES_MAX_ITERATIONS=60`
+    from an old `hermes setup` run shadowed `agent.max_turns: 500` in
+    config.yaml. The wizard must now display the config value.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    config = {
+        "agent": {"max_turns": 500},  # user bumped this in config.yaml
+        "display": {"tool_progress": "all"},
+        "compression": {"threshold": 0.50},
+        "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
+    }
+
+    prompt_answers = iter(["500", "all", "0.5"])
+
+    # Simulate stale .env value — the wizard must ignore this.
+    monkeypatch.setattr(
+        "hermes_cli.setup.get_env_value",
+        lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "",
+    )
+    monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
+    monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
+
+    removed_keys: list[str] = []
+    monkeypatch.setattr(
+        "hermes_cli.setup.remove_env_value",
+        lambda key: (removed_keys.append(key), True)[1],
+    )
+    monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
+
+    setup_agent_settings(config)
+
+    out = capsys.readouterr().out
+    # Config value wins
+    assert "Press Enter to keep 500." in out
+    assert "Press Enter to keep 60." not in out
+    # And the stale .env entry gets cleaned up
+    assert "HERMES_MAX_ITERATIONS" in removed_keys
@@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
    _configure_provider,
    _get_platform_tools,
    _platform_toolset_summary,
+    _reconfigure_tool,
    _save_platform_tools,
    _toolset_has_keys,
    CONFIGURABLE_TOOLSETS,
@@ -468,6 +469,33 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
    assert config["browser"]["cloud_provider"] == "local"


+def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypatch):
+    config = {"platform_toolsets": {"cli": ["web"]}}
+    seen = {}
+    configured = []
+
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._toolset_has_keys",
+        lambda ts_key, config=None: False,
+    )
+
+    def fake_prompt_choice(question, choices, default=0):
+        seen["choices"] = choices
+        return 0
+
+    monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr(
+        "hermes_cli.tools_config._configure_tool_category_for_reconfig",
+        lambda ts_key, cat, config: configured.append(ts_key),
+    )
+    monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
+
+    _reconfigure_tool(config)
+
+    assert any("Web Search" in choice for choice in seen["choices"])
+    assert configured == ["web"]
+
+
 def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
    monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
    monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
@@ -0,0 +1,69 @@
+"""Regression test for #17929: AIAgent.__init__ should try fallback_model
+when primary provider credentials are exhausted."""
+import pytest
+from unittest.mock import patch, MagicMock
+from run_agent import AIAgent
+
+
+def _make_tool_defs():
+    return [{"type": "function", "function": {"name": "web_search",
+             "description": "search", "parameters": {"type": "object", "properties": {}}}}]
+
+
+def _mock_client(api_key="fb-key-1234567890", base_url="https://fb.example.com/v1"):
+    c = MagicMock()
+    c.api_key = api_key
+    c.base_url = base_url
+    c._default_headers = None
+    return c
+
+
+def test_init_tries_fallback_when_primary_returns_none():
+    """When resolve_provider_client returns None for primary but succeeds for
+    a fallback entry, __init__ should NOT raise RuntimeError."""
+    fb = _mock_client()
+
+    def fake_resolve(provider, model=None, raw_codex=False,
+                     explicit_base_url=None, explicit_api_key=None):
+        if provider == "tencent-token-plan":
+            return fb, "kimi2.5"
+        return None, None  # primary exhausted
+
+    with patch("agent.auxiliary_client.resolve_provider_client", side_effect=fake_resolve), \
+         patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
+         patch("run_agent.check_toolset_requirements", return_value={}), \
+         patch("run_agent.OpenAI", return_value=MagicMock()):
+
+        agent = AIAgent(
+            provider="alibaba-coding-plan",
+            model="qwen3.6-plus",
+            api_key=None,
+            base_url=None,
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+            fallback_model=[{"provider": "tencent-token-plan", "model": "kimi2.5"}],
+        )
+        assert agent.provider == "tencent-token-plan"
+        assert agent.model == "kimi2.5"
+        assert agent._fallback_activated is True
+
+
+def test_init_raises_when_no_fallback_configured():
+    """When primary returns None and no fallback is set, should raise."""
+    with patch("agent.auxiliary_client.resolve_provider_client", return_value=(None, None)), \
+         patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
+         patch("run_agent.check_toolset_requirements", return_value={}), \
+         patch("run_agent.OpenAI", return_value=MagicMock()):
+
+        with pytest.raises(RuntimeError, match="no API key was found"):
+            AIAgent(
+                provider="alibaba-coding-plan",
+                model="qwen3.6-plus",
+                api_key=None,
+                base_url=None,
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+                fallback_model=None,
+            )
@@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai):
    agent._apply_client_headers_for_base_url("https://api.example.com/v1")

    assert "default_headers" not in agent._client_kwargs
+
+
+@patch("run_agent.OpenAI")
+def test_openrouter_headers_include_response_cache_when_enabled(mock_openai):
+    """When openrouter.response_cache is True, the cache header is injected."""
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    with patch("hermes_cli.config.load_config", return_value={
+        "openrouter": {"response_cache": True, "response_cache_ttl": 600},
+    }):
+        agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert headers["X-OpenRouter-Cache"] == "true"
+    assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+
+@patch("run_agent.OpenAI")
+def test_openrouter_headers_no_cache_when_disabled(mock_openai):
+    """When openrouter.response_cache is False, no cache headers are sent."""
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    with patch("hermes_cli.config.load_config", return_value={
+        "openrouter": {"response_cache": False},
+    }):
+        agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert "X-OpenRouter-Cache" not in headers
+    assert "X-OpenRouter-Cache-TTL" not in headers
@@ -0,0 +1,116 @@
+"""Tests for get_hermes_home() profile-mode fallback warning.
+
+Regression test for https://github.com/NousResearch/hermes-agent/issues/18594.
+
+When HERMES_HOME is unset but an active_profile file indicates a non-default
+profile is active, get_hermes_home() should:
+  1. STILL return ~/.hermes (raising would brick 30+ module-level callers)
+  2. Emit a loud one-shot warning to stderr so operators can diagnose
+     cross-profile data contamination after the fact.
+
+The warning goes to stderr directly (not through logging) because this
+function is called at module-import time from 30+ sites, often before the
+logging subsystem has been configured.
+"""
+
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def fresh_constants(monkeypatch, tmp_path):
+    """Import hermes_constants fresh and reset the one-shot warn flag."""
+    import importlib
+    import hermes_constants
+    importlib.reload(hermes_constants)
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+    monkeypatch.delenv("HERMES_HOME", raising=False)
+    return hermes_constants
+
+
+class TestGetHermesHomeProfileWarning:
+    def test_classic_mode_no_active_profile_no_warning(
+        self, fresh_constants, tmp_path, capsys
+    ):
+        """Classic mode: no active_profile file → silent, returns ~/.hermes."""
+        result = fresh_constants.get_hermes_home()
+        assert result == tmp_path / ".hermes"
+        assert "HERMES_HOME fallback" not in capsys.readouterr().err
+
+    def test_default_active_profile_no_warning(
+        self, fresh_constants, tmp_path, capsys
+    ):
+        """active_profile=default → still no warning, returns ~/.hermes."""
+        hermes_dir = tmp_path / ".hermes"
+        hermes_dir.mkdir()
+        (hermes_dir / "active_profile").write_text("default\n")
+        result = fresh_constants.get_hermes_home()
+        assert result == tmp_path / ".hermes"
+        assert "HERMES_HOME fallback" not in capsys.readouterr().err
+
+    def test_named_profile_unset_home_warns_once(
+        self, fresh_constants, tmp_path, capsys
+    ):
+        """active_profile=coder + HERMES_HOME unset → warn loudly, still return fallback."""
+        hermes_dir = tmp_path / ".hermes"
+        hermes_dir.mkdir()
+        (hermes_dir / "active_profile").write_text("coder\n")
+
+        result = fresh_constants.get_hermes_home()
+
+        # 1. Still returns the fallback — no import-time crash
+        assert result == tmp_path / ".hermes"
+        # 2. Stderr got the warning exactly once
+        err = capsys.readouterr().err
+        assert err.count("HERMES_HOME fallback") == 1
+        assert "'coder'" in err
+        assert "#18594" in err
+
+        # 3. One-shot: second and third calls don't re-warn
+        fresh_constants.get_hermes_home()
+        fresh_constants.get_hermes_home()
+        err2 = capsys.readouterr().err
+        assert "HERMES_HOME fallback" not in err2
+
+    def test_hermes_home_set_suppresses_warning(
+        self, fresh_constants, tmp_path, capsys, monkeypatch
+    ):
+        """Even if active_profile is 'coder', setting HERMES_HOME suppresses warning."""
+        profile_dir = tmp_path / ".hermes" / "profiles" / "coder"
+        profile_dir.mkdir(parents=True)
+        (tmp_path / ".hermes" / "active_profile").write_text("coder\n")
+        monkeypatch.setenv("HERMES_HOME", str(profile_dir))
+
+        result = fresh_constants.get_hermes_home()
+
+        assert result == profile_dir
+        assert "HERMES_HOME fallback" not in capsys.readouterr().err
+
+    def test_unreadable_active_profile_no_crash(
+        self, fresh_constants, tmp_path, capsys
+    ):
+        """active_profile that can't be decoded → fall through silently."""
+        hermes_dir = tmp_path / ".hermes"
+        hermes_dir.mkdir()
+        # Write bytes that aren't valid utf-8
+        (hermes_dir / "active_profile").write_bytes(b"\xff\xfe\x00\x00")
+
+        result = fresh_constants.get_hermes_home()
+
+        assert result == tmp_path / ".hermes"
+        # Shouldn't crash; shouldn't warn either (can't tell what profile was intended)
+        assert "HERMES_HOME fallback" not in capsys.readouterr().err
+
+    def test_empty_active_profile_no_warning(
+        self, fresh_constants, tmp_path, capsys
+    ):
+        """Empty active_profile file → treated as default, no warning."""
+        hermes_dir = tmp_path / ".hermes"
+        hermes_dir.mkdir()
+        (hermes_dir / "active_profile").write_text("")
+
+        result = fresh_constants.get_hermes_home()
+
+        assert result == tmp_path / ".hermes"
+        assert "HERMES_HOME fallback" not in capsys.readouterr().err
@@ -58,3 +58,11 @@ class TestCamofoxConfigDefaults:

        browser_cfg = DEFAULT_CONFIG["browser"]
        assert browser_cfg["camofox"]["managed_persistence"] is False
+
+    def test_config_version_is_positive_int(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        # Invariant: config version must be a positive integer.
+        # Don't assert a specific number — that's a change-detector.
+        assert isinstance(DEFAULT_CONFIG["_config_version"], int)
+        assert DEFAULT_CONFIG["_config_version"] > 0
@@ -371,6 +371,57 @@ class TestDeleteSkill:
            _delete_skill("my-skill")
        assert not (tmp_path / "devops").exists()

+    def test_delete_with_absorbed_into_valid_target(self, tmp_path):
+        with _skill_dir(tmp_path):
+            _create_skill("umbrella", VALID_SKILL_CONTENT)
+            _create_skill("narrow", VALID_SKILL_CONTENT)
+            result = _delete_skill("narrow", absorbed_into="umbrella")
+        assert result["success"] is True
+        assert "absorbed into 'umbrella'" in result["message"]
+        assert not (tmp_path / "narrow").exists()
+        assert (tmp_path / "umbrella").exists()
+
+    def test_delete_with_absorbed_into_empty_string_means_pruned(self, tmp_path):
+        with _skill_dir(tmp_path):
+            _create_skill("stale-skill", VALID_SKILL_CONTENT)
+            result = _delete_skill("stale-skill", absorbed_into="")
+        assert result["success"] is True
+        # Empty absorbed_into is explicit prune — no "absorbed into" suffix in message
+        assert "absorbed into" not in result["message"]
+
+    def test_delete_with_absorbed_into_nonexistent_target_rejected(self, tmp_path):
+        with _skill_dir(tmp_path):
+            _create_skill("narrow", VALID_SKILL_CONTENT)
+            result = _delete_skill("narrow", absorbed_into="ghost-umbrella")
+        assert result["success"] is False
+        assert "does not exist" in result["error"]
+        # Skill must NOT have been deleted on validation failure
+        assert (tmp_path / "narrow").exists()
+
+    def test_delete_with_absorbed_into_equals_self_rejected(self, tmp_path):
+        with _skill_dir(tmp_path):
+            _create_skill("narrow", VALID_SKILL_CONTENT)
+            result = _delete_skill("narrow", absorbed_into="narrow")
+        assert result["success"] is False
+        assert "cannot equal" in result["error"]
+        assert (tmp_path / "narrow").exists()
+
+    def test_delete_with_absorbed_into_whitespace_only_treated_as_prune(self, tmp_path):
+        # Leading/trailing whitespace only: .strip() → "" → pruned path
+        with _skill_dir(tmp_path):
+            _create_skill("narrow", VALID_SKILL_CONTENT)
+            result = _delete_skill("narrow", absorbed_into="   ")
+        assert result["success"] is True
+        assert "absorbed into" not in result["message"]
+
+    def test_delete_without_absorbed_into_backward_compat(self, tmp_path):
+        # Legacy callers that don't pass the arg still work — the curator
+        # reconciler falls back to its heuristic+YAML logic for such deletes.
+        with _skill_dir(tmp_path):
+            _create_skill("my-skill", VALID_SKILL_CONTENT)
+            result = _delete_skill("my-skill")
+        assert result["success"] is True
+

 # ---------------------------------------------------------------------------
 # write_file / remove_file
@@ -485,6 +536,25 @@ class TestSkillManageDispatcher:
        result = json.loads(raw)
        assert result["success"] is True

+    def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path):
+        # Dispatcher must plumb absorbed_into through to _delete_skill so the
+        # validation + message suffix paths are exercised end-to-end.
+        with _skill_dir(tmp_path):
+            skill_manage(action="create", name="umbrella", content=VALID_SKILL_CONTENT)
+            skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
+            raw = skill_manage(action="delete", name="narrow", absorbed_into="umbrella")
+        result = json.loads(raw)
+        assert result["success"] is True
+        assert "absorbed into 'umbrella'" in result["message"]
+
+    def test_delete_via_dispatcher_rejects_missing_absorbed_target(self, tmp_path):
+        with _skill_dir(tmp_path):
+            skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
+            raw = skill_manage(action="delete", name="narrow", absorbed_into="ghost")
+        result = json.loads(raw)
+        assert result["success"] is False
+        assert "does not exist" in result["error"]
+

 class TestSecurityScanGate:
    """_security_scan_skill is gated by skills.guard_agent_created config flag."""
@@ -0,0 +1,129 @@
+"""TinyFish cloud browser provider."""
+
+import logging
+import os
+import uuid
+from typing import Any, Dict, Optional
+
+import requests
+
+from tools.browser_providers.base import CloudBrowserProvider
+from tools.managed_tool_gateway import resolve_managed_tool_gateway
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_BASE_URL = "https://api.browser.tinyfish.ai"
+_DEFAULT_TIMEOUT_SECONDS = 300
+
+
+class TinyFishBrowserProvider(CloudBrowserProvider):
+    """TinyFish (https://tinyfish.ai) cloud browser backend."""
+
+    def provider_name(self) -> str:
+        return "TinyFish"
+
+    # ------------------------------------------------------------------
+    # Config resolution (direct API key OR managed Nous gateway)
+    # ------------------------------------------------------------------
+
+    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+        api_key = os.environ.get("TINYFISH_API_KEY")
+        if api_key:
+            return {
+                "api_key": api_key,
+                "base_url": os.environ.get("TINYFISH_API_URL", _DEFAULT_BASE_URL).rstrip("/"),
+                "managed_mode": False,
+            }
+
+        managed = resolve_managed_tool_gateway("tinyfish")
+        if managed is None:
+            return None
+
+        return {
+            "api_key": managed.nous_user_token,
+            "base_url": managed.gateway_origin.rstrip("/"),
+            "managed_mode": True,
+        }
+
+    def _get_config(self) -> Dict[str, Any]:
+        config = self._get_config_or_none()
+        if config is None:
+            raise ValueError(
+                "TinyFish requires a TINYFISH_API_KEY environment variable. "
+                "Get your API key at https://agent.tinyfish.ai/api-keys"
+            )
+        return config
+
+    def is_configured(self) -> bool:
+        return self._get_config_or_none() is not None
+
+    # ------------------------------------------------------------------
+    # Session lifecycle
+    # ------------------------------------------------------------------
+
+    def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
+        return {
+            "X-API-Key": config["api_key"],
+            "Content-Type": "application/json",
+        }
+
+    def create_session(self, task_id: str) -> Dict[str, object]:
+        config = self._get_config()
+
+        timeout_seconds = _DEFAULT_TIMEOUT_SECONDS
+        try:
+            timeout_seconds = int(os.environ.get("TINYFISH_BROWSER_TIMEOUT", str(_DEFAULT_TIMEOUT_SECONDS)))
+        except (ValueError, TypeError):
+            pass
+
+        response = requests.post(
+            config["base_url"],
+            headers=self._headers(config),
+            json={"timeout_seconds": timeout_seconds},
+            timeout=30,
+        )
+
+        if response.status_code in (401, 403):
+            raise ValueError(
+                f"TinyFish authentication failed (HTTP {response.status_code}). "
+                "Check your TINYFISH_API_KEY at https://agent.tinyfish.ai/api-keys"
+            )
+        if response.status_code == 402:
+            raise ValueError(
+                "TinyFish browser session failed: insufficient credits or no active subscription. "
+                "Check your account at https://agent.tinyfish.ai"
+            )
+        if response.status_code == 404:
+            raise ValueError(
+                "TinyFish browser API is not enabled on your plan. "
+                "Contact support or upgrade at https://agent.tinyfish.ai"
+            )
+        if not response.ok:
+            raise RuntimeError(
+                f"Failed to create TinyFish browser session: "
+                f"{response.status_code} {response.text[:200]}"
+            )
+
+        data = response.json()
+        session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
+
+        logger.info("Created TinyFish browser session %s", session_name)
+
+        return {
+            "session_name": session_name,
+            "bb_session_id": data["session_id"],
+            "cdp_url": data["cdp_url"],
+            "features": {"tinyfish": True},
+        }
+
+    def close_session(self, session_id: str) -> bool:
+        # TinyFish has no explicit delete endpoint — sessions auto-expire on inactivity timeout.
+        logger.debug(
+            "TinyFish sessions expire automatically on inactivity — no close call needed for %s",
+            session_id,
+        )
+        return True
+
+    def emergency_cleanup(self, session_id: str) -> None:
+        # No-op: TinyFish sessions are cleaned up server-side on inactivity.
+        logger.debug("TinyFish emergency_cleanup skipped for %s — auto-expiry handles cleanup", session_id)
@@ -83,6 +83,7 @@ from tools.browser_providers.base import CloudBrowserProvider
 from tools.browser_providers.browserbase import BrowserbaseProvider
 from tools.browser_providers.browser_use import BrowserUseProvider
 from tools.browser_providers.firecrawl import FirecrawlProvider
+from tools.browser_providers.tinyfish import TinyFishBrowserProvider
 from tools.tool_backend_helpers import normalize_browser_cloud_provider

 # Camofox local anti-detection browser backend (optional).
@@ -391,6 +392,7 @@ _PROVIDER_REGISTRY: Dict[str, type] = {
    "browserbase": BrowserbaseProvider,
    "browser-use": BrowserUseProvider,
    "firecrawl": FirecrawlProvider,
+    "tinyfish": TinyFishBrowserProvider,
 }

 _cached_cloud_provider: Optional[CloudBrowserProvider] = None
@@ -560,8 +560,18 @@ def _patch_skill(
    }


-def _delete_skill(name: str) -> Dict[str, Any]:
-    """Delete a skill."""
+def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, Any]:
+    """Delete a skill.
+
+    ``absorbed_into`` declares intent:
+      - ``None`` / missing  → caller didn't declare (legacy / non-curator path);
+        accepted for backward compat but logs a warning because the curator
+        classification pipeline can't tell consolidation from pruning without it.
+      - ``""`` (empty)      → explicit "truly pruned, no forwarding target".
+      - ``"<skill-name>"``  → content was absorbed into that umbrella; the
+        target must exist on disk. Validated here so the model can't claim an
+        umbrella that doesn't exist.
+    """
    existing = _find_skill(name)
    if not existing:
        return {"success": False, "error": f"Skill '{name}' not found."}
@@ -570,6 +580,24 @@ def _delete_skill(name: str) -> Dict[str, Any]:
    if pinned_err:
        return {"success": False, "error": pinned_err}

+    # Validate absorbed_into target when declared non-empty
+    if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
+        target_name = absorbed_into.strip()
+        if target_name == name:
+            return {
+                "success": False,
+                "error": f"absorbed_into='{target_name}' cannot equal the skill being deleted.",
+            }
+        target = _find_skill(target_name)
+        if not target:
+            return {
+                "success": False,
+                "error": (
+                    f"absorbed_into='{target_name}' does not exist. "
+                    f"Create or patch the umbrella skill first, then retry the delete."
+                ),
+            }
+
    skill_dir = existing["path"]
    skills_root = _containing_skills_root(skill_dir)
    shutil.rmtree(skill_dir)
@@ -579,9 +607,13 @@ def _delete_skill(name: str) -> Dict[str, Any]:
    if parent != skills_root and parent.exists() and not any(parent.iterdir()):
        parent.rmdir()

+    message = f"Skill '{name}' deleted."
+    if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
+        message += f" Content absorbed into '{absorbed_into.strip()}'."
+
    return {
        "success": True,
-        "message": f"Skill '{name}' deleted.",
+        "message": message,
    }


@@ -702,6 +734,7 @@ def skill_manage(
    old_string: str = None,
    new_string: str = None,
    replace_all: bool = False,
+    absorbed_into: str = None,
 ) -> str:
    """
    Manage user-created skills. Dispatches to the appropriate action handler.
@@ -726,7 +759,7 @@ def skill_manage(
        result = _patch_skill(name, old_string, new_string, file_path, replace_all)

    elif action == "delete":
-        result = _delete_skill(name)
+        result = _delete_skill(name, absorbed_into=absorbed_into)

    elif action == "write_file":
        if not file_path:
@@ -778,6 +811,13 @@ SKILL_MANAGE_SCHEMA = {
        "patch (old_string/new_string — preferred for fixes), "
        "edit (full SKILL.md rewrite — major overhauls only), "
        "delete, write_file, remove_file.\n\n"
+        "On delete, pass `absorbed_into=<umbrella>` when you're merging this "
+        "skill's content into another one, or `absorbed_into=\"\"` when you're "
+        "pruning it with no forwarding target. This lets the curator tell "
+        "consolidation from pruning without guessing, so downstream consumers "
+        "(cron jobs that reference the old skill name, etc.) get updated "
+        "correctly. The target you name in `absorbed_into` must already "
+        "exist — create/patch the umbrella first, then delete.\n\n"
        "Create when: complex task succeeded (5+ calls), errors overcome, "
        "user-corrected approach worked, non-trivial workflow discovered, "
        "or user asks you to remember a procedure.\n"
@@ -855,6 +895,20 @@ SKILL_MANAGE_SCHEMA = {
                "type": "string",
                "description": "Content for the file. Required for 'write_file'."
            },
+            "absorbed_into": {
+                "type": "string",
+                "description": (
+                    "For 'delete' only — declares intent so the curator can "
+                    "tell consolidation from pruning without guessing. "
+                    "Pass the umbrella skill name when this skill's content "
+                    "was merged into another (the target must already exist). "
+                    "Pass an empty string when the skill is truly stale and "
+                    "being pruned with no forwarding target. Omitting the arg "
+                    "on delete is supported for backward compatibility but "
+                    "downstream tooling (e.g. cron-job skill reference "
+                    "rewriting) will have to guess at intent."
+                )
+            },
        },
        "required": ["action", "name"],
    },
@@ -877,6 +931,7 @@ registry.register(
        file_content=args.get("file_content"),
        old_string=args.get("old_string"),
        new_string=args.get("new_string"),
-        replace_all=args.get("replace_all", False)),
+        replace_all=args.get("replace_all", False),
+        absorbed_into=args.get("absorbed_into")),
    emoji="📝",
 )
@@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 |----------|-------------|
 | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) |
 | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL |
+| `HERMES_OPENROUTER_CACHE` | Enable OpenRouter response caching (`1`/`true`/`yes`/`on`). Overrides `openrouter.response_cache` in config.yaml. See [Response Caching](https://openrouter.ai/docs/guides/features/response-caching). |
+| `HERMES_OPENROUTER_CACHE_TTL` | Cache TTL in seconds (1-86400). Overrides `openrouter.response_cache_ttl` in config.yaml. |
 | `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly |
 | `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |
@@ -116,6 +118,9 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
 | `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) |
 | `FIRECRAWL_BROWSER_TTL` | Firecrawl browser session TTL in seconds (default: 300) |
+| `TINYFISH_API_KEY` | TinyFish API key for cloud browser ([agent.tinyfish.ai](https://agent.tinyfish.ai/api-keys)) |
+| `TINYFISH_API_URL` | TinyFish browser API URL override for staging/dev (optional) |
+| `TINYFISH_BROWSER_TIMEOUT` | TinyFish browser session inactivity timeout in seconds (default: 300) |
 | `BROWSER_CDP_URL` | Chrome DevTools Protocol URL for local browser (set via `/browser connect`, e.g. `ws://localhost:9222`) |
 | `CAMOFOX_URL` | Camofox local anti-detection browser URL (default: `http://localhost:9377`) |
 | `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds |
@@ -12,6 +12,7 @@ Hermes Agent includes a full browser automation toolset with multiple backend op
 - **Browserbase cloud mode** via [Browserbase](https://browserbase.com) for managed cloud browsers and anti-bot tooling
 - **Browser Use cloud mode** via [Browser Use](https://browser-use.com) as an alternative cloud browser provider
 - **Firecrawl cloud mode** via [Firecrawl](https://firecrawl.dev) for cloud browsers with built-in scraping
+- **TinyFish cloud mode** via [TinyFish](https://tinyfish.ai) for fast cloud CDP browsers
 - **Camofox local mode** via [Camofox](https://github.com/jo-inc/camofox-browser) for local anti-detection browsing (Firefox-based fingerprint spoofing)
 - **Local Chrome via CDP** — connect browser tools to your own Chrome instance using `/browser connect`
 - **Local browser mode** via the `agent-browser` CLI and a local Chromium installation
@@ -86,6 +87,29 @@ FIRECRAWL_API_URL=http://localhost:3002
 FIRECRAWL_BROWSER_TTL=600
 ```

+### TinyFish cloud mode
+
+To use TinyFish as your cloud browser provider, add:
+
+```bash
+# Add to ~/.hermes/.env
+TINYFISH_API_KEY=your_key_here
+```
+
+Get your API key at [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys). Then select TinyFish as your browser provider:
+
+```bash
+hermes setup tools
+# → Browser Automation → TinyFish
+```
+
+Optional settings:
+
+```bash
+# Session inactivity timeout in seconds (default: 300, capped to your plan maximum)
+TINYFISH_BROWSER_TIMEOUT=600
+```
+
 ### Hybrid routing: cloud for public URLs, local for LAN/localhost

 When a cloud provider is configured, Hermes auto-spawns a **local Chromium sidecar**
@@ -69,7 +69,7 @@ tts:
    model: "gemini-2.5-flash-preview-tts"  # or gemini-2.5-pro-preview-tts
    voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, Gacrux, etc.
  xai:
-    voice_id: "eve"             # xAI TTS voice (see https://docs.x.ai/docs/api-reference#tts)
+    voice_id: "eve"             # or a custom voice ID — see docs below
    language: "en"              # ISO 639-1 code
    sample_rate: 24000          # 22050 / 24000 (default) / 44100 / 48000
    bit_rate: 128000            # MP3 bitrate; only applies when codec=mp3
@@ -127,6 +127,19 @@ Without ffmpeg, Edge TTS, MiniMax TTS, NeuTTS, KittenTTS, and Piper audio are se
 If you want voice bubbles without installing ffmpeg, switch to the OpenAI, ElevenLabs, or Mistral provider.
 :::

+### xAI Custom Voices (voice cloning)
+
+xAI supports cloning your voice and using it with TTS. Create a custom voice in the [xAI Console](https://console.x.ai/team/default/voice/voice-library), then set the resulting `voice_id` in your config:
+
+```yaml
+tts:
+  provider: xai
+  xai:
+    voice_id: "nlbqfwie"   # your custom voice ID
+```
+
+See the [xAI Custom Voices docs](https://docs.x.ai/developers/model-capabilities/audio/custom-voices) for details on recording, supported formats, and limits.
+
 ### Piper (local, 44 languages)

 Piper is a fast, local neural TTS engine from the Open Home Foundation (the Home Assistant maintainers). It runs entirely on CPU, supports **44 languages** with pre-trained voices, and needs no API key.