Merge remote-tracking branch 'origin/main' into switch-managed-browser-to-browser-use

fix(browser-use): port missing improvements from PR #5605
- CDP URL normalization: resolve HTTP discovery URLs to websocket after cloud provider create_session() (prevents agent-browser failures) - Managed session payload: send timeout=5 and proxyCountryCode=us for gateway-backed sessions (prevents billing overruns) - Update prompt builder, browser_close schema, and module docstring to replace remaining Browserbase references with Browser Use - Dynamic /browser status detection via _get_cloud_provider() instead of hardcoded env var checks (future-proof for new providers) - Rename post_setup key from 'browserbase' to 'agent_browser' - Update setup hint to mention Browser Use alongside Browserbase - Add tests: CDP normalization, browserbase direct-only guard, managed browser-use gateway, direct browserbase fallback
2026-04-07 08:12:45 -04:00 · 2026-04-07 22:00:15 +10:00 · 2026-04-07 20:43:14 +10:00 · 2026-04-07 03:28:44 -07:00 · 2026-04-07 02:49:20 -07:00 · 2026-04-07 02:40:16 -07:00
81 changed files with 5275 additions and 533 deletions
@@ -39,7 +39,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
    "browser_scroll": "execute",
    "browser_press": "execute",
    "browser_back": "execute",
-    "browser_close": "execute",
    "browser_get_images": "read",
    # Agent internals
    "delegate_task": "execute",
@@ -260,26 +260,73 @@ class _CodexCompletionsAdapter:
        usage = None

        try:
+            # Collect output items and text deltas during streaming —
+            # the Codex backend can return empty response.output from
+            # get_final_response() even when items were streamed.
+            collected_output_items: List[Any] = []
+            collected_text_deltas: List[str] = []
+            has_function_calls = False
            with self._client.responses.stream(**resp_kwargs) as stream:
                for _event in stream:
-                    pass
+                    _etype = getattr(_event, "type", "")
+                    if _etype == "response.output_item.done":
+                        _done = getattr(_event, "item", None)
+                        if _done is not None:
+                            collected_output_items.append(_done)
+                    elif "output_text.delta" in _etype:
+                        _delta = getattr(_event, "delta", "")
+                        if _delta:
+                            collected_text_deltas.append(_delta)
+                    elif "function_call" in _etype:
+                        has_function_calls = True
                final = stream.get_final_response()

-            # Extract text and tool calls from the Responses output
+            # Backfill empty output from collected stream events
+            _output = getattr(final, "output", None)
+            if isinstance(_output, list) and not _output:
+                if collected_output_items:
+                    final.output = list(collected_output_items)
+                    logger.debug(
+                        "Codex auxiliary: backfilled %d output items from stream events",
+                        len(collected_output_items),
+                    )
+                elif collected_text_deltas and not has_function_calls:
+                    # Only synthesize text when no tool calls were streamed —
+                    # a function_call response with incidental text should not
+                    # be collapsed into a plain-text message.
+                    assembled = "".join(collected_text_deltas)
+                    final.output = [SimpleNamespace(
+                        type="message", role="assistant", status="completed",
+                        content=[SimpleNamespace(type="output_text", text=assembled)],
+                    )]
+                    logger.debug(
+                        "Codex auxiliary: synthesized from %d deltas (%d chars)",
+                        len(collected_text_deltas), len(assembled),
+                    )
+
+            # Extract text and tool calls from the Responses output.
+            # Items may be SDK objects (attrs) or dicts (raw/fallback paths),
+            # so use a helper that handles both shapes.
+            def _item_get(obj: Any, key: str, default: Any = None) -> Any:
+                val = getattr(obj, key, None)
+                if val is None and isinstance(obj, dict):
+                    val = obj.get(key, default)
+                return val if val is not None else default
+
            for item in getattr(final, "output", []):
-                item_type = getattr(item, "type", None)
+                item_type = _item_get(item, "type")
                if item_type == "message":
-                    for part in getattr(item, "content", []):
-                        ptype = getattr(part, "type", None)
+                    for part in (_item_get(item, "content") or []):
+                        ptype = _item_get(part, "type")
                        if ptype in ("output_text", "text"):
-                            text_parts.append(getattr(part, "text", ""))
+                            text_parts.append(_item_get(part, "text", ""))
                elif item_type == "function_call":
                    tool_calls_raw.append(SimpleNamespace(
-                        id=getattr(item, "call_id", ""),
+                        id=_item_get(item, "call_id", ""),
                        type="function",
                        function=SimpleNamespace(
-                            name=getattr(item, "name", ""),
-                            arguments=getattr(item, "arguments", "{}"),
+                            name=_item_get(item, "name", ""),
+                            arguments=_item_get(item, "arguments", "{}"),
                        ),
                    ))

@@ -27,6 +27,7 @@ from hermes_cli.auth import (
    _is_expiring,
    _load_auth_store,
    _load_provider_state,
+    _resolve_zai_base_url,
    read_credential_pool,
    write_credential_pool,
 )
@@ -348,6 +349,9 @@ def get_pool_strategy(provider: str) -> str:
    return STRATEGY_FILL_FIRST


+DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
+
+
 class CredentialPool:
    def __init__(self, provider: str, entries: List[PooledCredential]):
        self.provider = provider
@@ -355,6 +359,8 @@ class CredentialPool:
        self._current_id: Optional[str] = None
        self._strategy = get_pool_strategy(provider)
        self._lock = threading.Lock()
+        self._active_leases: Dict[str, int] = {}
+        self._max_concurrent = DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL

    def has_credentials(self) -> bool:
        return bool(self._entries)
@@ -760,6 +766,51 @@ class CredentialPool:
                logger.info("credential pool: rotated to %s", _next_label)
            return next_entry

+    def acquire_lease(self, credential_id: Optional[str] = None) -> Optional[str]:
+        """Acquire a soft lease on a credential.
+
+        If a specific credential_id is provided, lease that entry directly.
+        Otherwise prefer the least-leased available credential, using priority as
+        a stable tie-breaker. When every credential is already at the soft cap,
+        still return the least-leased one instead of blocking.
+        """
+        with self._lock:
+            if credential_id:
+                self._active_leases[credential_id] = self._active_leases.get(credential_id, 0) + 1
+                self._current_id = credential_id
+                return credential_id
+
+            available = self._available_entries(clear_expired=True, refresh=True)
+            if not available:
+                return None
+
+            below_cap = [
+                entry for entry in available
+                if self._active_leases.get(entry.id, 0) < self._max_concurrent
+            ]
+            candidates = below_cap if below_cap else available
+            chosen = min(
+                candidates,
+                key=lambda entry: (self._active_leases.get(entry.id, 0), entry.priority),
+            )
+            self._active_leases[chosen.id] = self._active_leases.get(chosen.id, 0) + 1
+            self._current_id = chosen.id
+            return chosen.id
+
+    def release_lease(self, credential_id: str) -> None:
+        """Release a previously acquired credential lease."""
+        with self._lock:
+            count = self._active_leases.get(credential_id, 0)
+            if count <= 1:
+                self._active_leases.pop(credential_id, None)
+            else:
+                self._active_leases[credential_id] = count - 1
+
+    def active_lease_count(self, credential_id: str) -> int:
+        """Return the number of active leases for a credential."""
+        with self._lock:
+            return self._active_leases.get(credential_id, 0)
+
    def try_refresh_current(self) -> Optional[PooledCredential]:
        with self._lock:
            return self._try_refresh_current_unlocked()
@@ -1036,6 +1087,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
+        if provider == "zai":
+            base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
        changed |= _upsert_entry(
            entries,
            provider,
@@ -890,8 +890,6 @@ def get_cute_tool_message(
        return _wrap(f"┊ ◀️  back      {dur}")
    if tool_name == "browser_press":
        return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
-    if tool_name == "browser_close":
-        return _wrap(f"┊ 🚪 close     browser  {dur}")
    if tool_name == "browser_get_images":
        return _wrap(f"┊ 🖼️  images    extracting  {dur}")
    if tool_name == "browser_vision":
@@ -744,7 +744,6 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
        "browser_type",
        "browser_scroll",
        "browser_console",
-        "browser_close",
        "browser_press",
        "browser_get_images",
        "browser_vision",
@@ -774,13 +773,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
    lines.extend(
        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
@@ -539,7 +539,7 @@ platform_toolsets:
 #   terminal     - terminal, process
 #   file         - read_file, write_file, patch, search
 #   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
-#                  browser_scroll, browser_back, browser_press, browser_close,
+#                  browser_scroll, browser_back, browser_press,
 #                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
 #   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
 #   image_gen    - image_generate  (requires FAL_KEY)
@@ -1920,6 +1920,12 @@ class HermesCLI:
            _cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
            self._reasoning_box_opened = False

+            # Flush any content that was deferred while reasoning was rendering.
+            deferred = getattr(self, "_deferred_content", "")
+            if deferred:
+                self._deferred_content = ""
+                self._emit_stream_text(deferred)
+
    def _stream_delta(self, text) -> None:
        """Line-buffered streaming callback for real-time token rendering.

@@ -2022,6 +2028,13 @@ class HermesCLI:
        if not text:
            return

+        # When show_reasoning is on and reasoning is still rendering,
+        # defer content until the reasoning box closes.  This ensures the
+        # reasoning block always appears BEFORE the response in the terminal.
+        if self.show_reasoning and getattr(self, "_reasoning_box_opened", False):
+            self._deferred_content = getattr(self, "_deferred_content", "") + text
+            return
+
        # Close the live reasoning box before opening the response box
        self._close_reasoning_box()

@@ -2088,6 +2101,7 @@ class HermesCLI:
        self._reasoning_box_opened = False
        self._reasoning_buf = ""
        self._reasoning_preview_buf = ""
+        self._deferred_content = ""

    def _slow_command_status(self, command: str) -> str:
        """Return a user-facing status message for slower slash commands."""
@@ -5023,13 +5037,13 @@ class HermesCLI:
                    pass
                print()
                print("🌐 Browser disconnected from live Chrome")
-                print("   Browser tools reverted to default mode (local headless or Browserbase)")
+                print("   Browser tools reverted to default mode (local headless or cloud provider)")
                print()

                if hasattr(self, '_pending_input'):
                    self._pending_input.put(
                        "[System note: The user has disconnected the browser tools from their live Chrome. "
-                        "Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
+                        "Browser tools are back to default mode (headless local browser or cloud provider).]"
                    )
            else:
                print()
@@ -5056,10 +5070,17 @@ class HermesCLI:
                    print("   Status: ✓ reachable")
                except (OSError, Exception):
                    print("   Status: ⚠ not reachable (Chrome may not be running)")
-            elif os.environ.get("BROWSERBASE_API_KEY"):
-                print("🌐 Browser: Browserbase (cloud)")
            else:
-                print("🌐 Browser: local headless Chromium (agent-browser)")
+                try:
+                    from tools.browser_tool import _get_cloud_provider
+                    provider = _get_cloud_provider()
+                except Exception:
+                    provider = None
+
+                if provider is not None:
+                    print(f"🌐 Browser: {provider.provider_name()} (cloud)")
+                else:
+                    print("🌐 Browser: local headless Chromium (agent-browser)")
            print()
            print("   /browser connect      — connect to your live Chrome")
            print("   /browser disconnect   — revert to default")
@@ -8120,6 +8141,25 @@ class HermesCLI:
                        # Periodic config watcher — auto-reload MCP on mcp_servers change
                        if not self._agent_running:
                            self._check_config_mcp_changes()
+                            # Check for background process completion notifications
+                            # while the agent is idle (user hasn't typed anything yet).
+                            try:
+                                from tools.process_registry import process_registry
+                                if not process_registry.completion_queue.empty():
+                                    completion = process_registry.completion_queue.get_nowait()
+                                    _exit = completion.get("exit_code", "?")
+                                    _cmd = completion.get("command", "unknown")
+                                    _sid = completion.get("session_id", "unknown")
+                                    _out = completion.get("output", "")
+                                    _synth = (
+                                        f"[SYSTEM: Background process {_sid} completed "
+                                        f"(exit code {_exit}).\n"
+                                        f"Command: {_cmd}\n"
+                                        f"Output:\n{_out}]"
+                                    )
+                                    self._pending_input.put(_synth)
+                            except Exception:
+                                pass
                        continue
                    
                    if not user_input:
@@ -8233,7 +8273,29 @@ class HermesCLI:
                                except Exception as e:
                                    _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
                            threading.Thread(target=_restart_recording, daemon=True).start()
-                    
+
+                        # Drain process completion notifications — any background
+                        # process that finished with notify_on_complete while the
+                        # agent was running (or before) gets auto-injected as a
+                        # new user message so the agent can react to it.
+                        try:
+                            from tools.process_registry import process_registry
+                            while not process_registry.completion_queue.empty():
+                                completion = process_registry.completion_queue.get_nowait()
+                                _exit = completion.get("exit_code", "?")
+                                _cmd = completion.get("command", "unknown")
+                                _sid = completion.get("session_id", "unknown")
+                                _out = completion.get("output", "")
+                                _synth = (
+                                    f"[SYSTEM: Background process {_sid} completed "
+                                    f"(exit code {_exit}).\n"
+                                    f"Command: {_cmd}\n"
+                                    f"Output:\n{_out}]"
+                                )
+                                self._pending_input.put(_synth)
+                        except Exception:
+                            pass  # Non-fatal — don't break the main loop
+
                except Exception as e:
                    print(f"Error: {e}")
        
@@ -569,6 +569,16 @@ class BasePlatformAdapter(ABC):
        """
        self._message_handler = handler
    
+    def set_session_store(self, session_store: Any) -> None:
+        """
+        Set the session store for checking active sessions.
+        
+        Used by adapters that need to check if a thread/conversation
+        has an active session before processing messages (e.g., Slack
+        thread replies without explicit mentions).
+        """
+        self._session_store = session_store
+    
    @abstractmethod
    async def connect(self) -> bool:
        """
@@ -1093,16 +1103,20 @@ class BasePlatformAdapter(ABC):
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
-            # /approve and /deny must bypass the active-session guard.
-            # The agent thread is blocked on threading.Event.wait() inside
-            # tools/approval.py — queuing these commands creates a deadlock:
-            # the agent waits for approval, approval waits for agent to finish.
-            # Dispatch directly to the message handler without touching session
-            # lifecycle (no competing background task, no session guard removal).
+            # Certain commands must bypass the active-session guard and be
+            # dispatched directly to the gateway runner.  Without this, they
+            # are queued as pending messages and either:
+            #   - leak into the conversation as user text (/stop, /new), or
+            #   - deadlock (/approve, /deny — agent is blocked on Event.wait)
+            #
+            # Dispatch inline: call the message handler directly and send the
+            # response.  Do NOT use _process_message_background — it manages
+            # session lifecycle and its cleanup races with the running task
+            # (see PR #4926).
            cmd = event.get_command()
-            if cmd in ("approve", "deny"):
+            if cmd in ("approve", "deny", "status", "stop", "new", "reset"):
                logger.debug(
-                    "[%s] Approval command '/%s' bypassing active-session guard for %s",
+                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
                )
                try:
@@ -1116,29 +1130,7 @@ class BasePlatformAdapter(ABC):
                            metadata=_thread_meta,
                        )
                except Exception as e:
-                    logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
-                return
-
-            # /status must also bypass the active-session guard so it always
-            # returns a system-generated response instead of being queued as
-            # user text and passed to the agent (#5046).
-            if cmd == "status":
-                logger.debug(
-                    "[%s] Status command bypassing active-session guard for %s",
-                    self.name, session_key,
-                )
-                try:
-                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-                    response = await self._message_handler(event)
-                    if response:
-                        await self._send_with_retry(
-                            chat_id=event.source.chat_id,
-                            content=response,
-                            reply_to=event.message_id,
-                            metadata=_thread_meta,
-                        )
-                except Exception as e:
-                    logger.error("[%s] Status dispatch failed: %s", self.name, e, exc_info=True)
+                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return

            # Special case: photo bursts/albums frequently arrive as multiple near-
@@ -2039,6 +2039,66 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive select-menu model picker.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Uses Discord embeds + Select menus via ``ModelPickerView``.
+        """
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Resolve target channel (use thread_id if present)
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(current_provider)
+            except Exception:
+                provider_label = current_provider
+
+            embed = discord.Embed(
+                title="⚙ Model Configuration",
+                description=(
+                    f"Current model: `{current_model or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                color=discord.Color.blue(),
+            )
+
+            view = ModelPickerView(
+                providers=providers,
+                current_model=current_model,
+                current_provider=current_provider,
+                session_key=session_key,
+                on_model_selected=on_model_selected,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
    def _get_parent_channel_id(self, channel: Any) -> Optional[str]:
        """Return the parent channel ID for a Discord thread-like channel, if present."""
        parent = getattr(channel, "parent", None)
@@ -2530,3 +2590,218 @@ if DISCORD_AVAILABLE:
            self.resolved = True
            for child in self.children:
                child.disabled = True
+
+    class ModelPickerView(discord.ui.View):
+        """Interactive select-menu view for model switching.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Edits the original message in-place as the user navigates.
+        Times out after 2 minutes.
+        """
+
+        def __init__(
+            self,
+            providers: list,
+            current_model: str,
+            current_provider: str,
+            session_key: str,
+            on_model_selected,
+            allowed_user_ids: set,
+        ):
+            super().__init__(timeout=120)
+            self.providers = providers
+            self.current_model = current_model
+            self.current_provider = current_provider
+            self.session_key = session_key
+            self.on_model_selected = on_model_selected
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+            self._selected_provider: str = ""
+
+            self._build_provider_select()
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        def _build_provider_select(self):
+            """Build the provider dropdown menu."""
+            self.clear_items()
+            options = []
+            for p in self.providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count} models)"
+                desc = "current" if p.get("is_current") else None
+                options.append(
+                    discord.SelectOption(
+                        label=label[:100],
+                        value=p["slug"],
+                        description=desc,
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder="Choose a provider...",
+                options=options[:25],
+                custom_id="model_provider_select",
+            )
+            select.callback = self._on_provider_selected
+            self.add_item(select)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        def _build_model_select(self, provider_slug: str):
+            """Build the model dropdown for a specific provider."""
+            self.clear_items()
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            if not provider:
+                return
+
+            models = provider.get("models", [])
+            options = []
+            for model_id in models[:25]:
+                short = model_id.split("/")[-1] if "/" in model_id else model_id
+                options.append(
+                    discord.SelectOption(
+                        label=short[:100],
+                        value=model_id[:100],
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder=f"Choose a model from {provider.get('name', provider_slug)}...",
+                options=options,
+                custom_id="model_model_select",
+            )
+            select.callback = self._on_model_selected
+            self.add_item(select)
+
+            back_btn = discord.ui.Button(
+                label="◀ Back", style=discord.ButtonStyle.grey, custom_id="model_back"
+            )
+            back_btn.callback = self._on_back
+            self.add_item(back_btn)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel2"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        async def _on_provider_selected(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            provider_slug = interaction.data["values"][0]
+            self._selected_provider = provider_slug
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            pname = provider.get("name", provider_slug) if provider else provider_slug
+
+            self._build_model_select(provider_slug)
+
+            total = provider.get("total_models", 0) if provider else 0
+            shown = min(len(provider.get("models", [])), 25) if provider else 0
+            extra = f"\n*{total - shown} more available — type `/model <name>` directly*" if total > shown else ""
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=f"Provider: **{pname}**\nSelect a model:{extra}",
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_model_selected(self, interaction: discord.Interaction):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "Already resolved~", ephemeral=True
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self.resolved = True
+            model_id = interaction.data["values"][0]
+
+            try:
+                result_text = await self.on_model_selected(
+                    str(interaction.channel_id),
+                    model_id,
+                    self._selected_provider,
+                )
+            except Exception as exc:
+                result_text = f"Error switching model: {exc}"
+
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Switched",
+                    description=result_text,
+                    color=discord.Color.green(),
+                ),
+                view=self,
+            )
+
+        async def _on_back(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self._build_provider_select()
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(self.current_provider)
+            except Exception:
+                provider_label = self.current_provider
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=(
+                        f"Current model: `{self.current_model or 'unknown'}`\n"
+                        f"Provider: {provider_label}\n\n"
+                        f"Select a provider:"
+                    ),
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_cancel(self, interaction: discord.Interaction):
+            self.resolved = True
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description="Model selection cancelled.",
+                    color=discord.Color.greyple(),
+                ),
+                view=self,
+            )
+
+        async def on_timeout(self):
+            self.resolved = True
+            self.clear_items()
@@ -276,10 +276,13 @@ class SlackAdapter(BasePlatformAdapter):
        if not self._app:
            return SendResult(success=False, error="Not connected")
        try:
+            # Convert standard markdown → Slack mrkdwn
+            formatted = self.format_message(content)
+
            await self._get_client(chat_id).chat_update(
                channel=chat_id,
                ts=message_id,
-                text=content,
+                text=formatted,
            )
            return SendResult(success=True, message_id=message_id)
        except Exception as e:  # pragma: no cover - defensive logging
@@ -763,11 +766,28 @@ class SlackAdapter(BasePlatformAdapter):
        else:
            thread_ts = event.get("thread_ts") or ts  # ts fallback for channels

-        # In channels, only respond if bot is mentioned
+        # In channels, only respond if bot is mentioned OR if this is a
+        # reply in a thread where the bot has an active session.
        bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
-        if not is_dm and bot_uid:
-            if f"<@{bot_uid}>" not in text:
+        is_mentioned = bot_uid and f"<@{bot_uid}>" in text
+        
+        if not is_dm and bot_uid and not is_mentioned:
+            # Check if this is a thread reply (thread_ts exists and differs from ts)
+            event_thread_ts = event.get("thread_ts")
+            is_thread_reply = event_thread_ts and event_thread_ts != ts
+            
+            if is_thread_reply and self._has_active_session_for_thread(
+                channel_id=channel_id,
+                thread_ts=event_thread_ts,
+                user_id=user_id,
+            ):
+                # Allow thread replies without mention if there's an active session
+                pass
+            else:
+                # Not a thread reply or no active session - ignore
                return
+        
+        if is_mentioned:
            # Strip the bot mention from the text
            text = text.replace(f"<@{bot_uid}>", "").strip()

@@ -933,6 +953,68 @@ class SlackAdapter(BasePlatformAdapter):

        await self.handle_message(event)

+    def _has_active_session_for_thread(
+        self,
+        channel_id: str,
+        thread_ts: str,
+        user_id: str,
+    ) -> bool:
+        """Check if there's an active session for a thread.
+        
+        Used to determine if thread replies without @mentions should be
+        processed (they should if there's an active session).
+        
+        Args:
+            channel_id: The Slack channel ID
+            thread_ts: The thread timestamp (parent message ts)
+            user_id: The user ID of the sender
+            
+        Returns:
+            True if there's an active session for this thread
+        """
+        session_store = getattr(self, "_session_store", None)
+        if not session_store:
+            return False
+        
+        try:
+            # Build a SessionSource for this thread
+            from gateway.session import SessionSource
+            from gateway.config import Platform
+            
+            source = SessionSource(
+                platform=Platform.SLACK,
+                chat_id=channel_id,
+                chat_type="group",
+                user_id=user_id,
+                thread_id=thread_ts,
+            )
+            
+            # Generate the session key using the same logic as SessionStore
+            # This mirrors the logic in build_session_key for group sessions
+            key_parts = ["agent:main", "slack", "group", channel_id, thread_ts]
+            
+            # Include user_id if group_sessions_per_user is enabled
+            # We check the session store config if available
+            group_sessions_per_user = getattr(
+                session_store, "config", {}
+            )
+            if hasattr(group_sessions_per_user, "group_sessions_per_user"):
+                group_sessions_per_user = group_sessions_per_user.group_sessions_per_user
+            else:
+                group_sessions_per_user = True  # Default
+            
+            if group_sessions_per_user and user_id:
+                key_parts.append(str(user_id))
+            
+            session_key = ":".join(key_parts)
+            
+            # Check if the session exists in the store
+            session_store._ensure_loaded()
+            return session_key in session_store._entries
+        except Exception:
+            # If anything goes wrong, default to False (require mention)
+            return False
+
    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
        import asyncio
@@ -151,6 +151,8 @@ class TelegramAdapter(BasePlatformAdapter):
        self._dm_topics: Dict[str, int] = {}
        # DM Topics config from extra.dm_topics
        self._dm_topics_config: List[Dict[str, Any]] = self.config.extra.get("dm_topics", [])
+        # Interactive model picker state per chat
+        self._model_picker_state: Dict[str, dict] = {}

    def _fallback_ips(self) -> list[str]:
        """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
@@ -1008,14 +1010,318 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive inline-keyboard model picker.
+
+        Two-step drill-down: provider selection → model selection.
+        Edits the same message in-place as the user navigates.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        try:
+            # Build provider buttons — 2 per row
+            buttons: list = []
+            for p in providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                # Compact callback data: mp:<slug>  (max 64 bytes)
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            provider_label = get_label(current_provider)
+            text = (
+                f"⚙ *Model Configuration*\n\n"
+                f"Current model: `{current_model or 'unknown'}`\n"
+                f"Provider: {provider_label}\n\n"
+                f"Select a provider:"
+            )
+
+            thread_id = metadata.get("thread_id") if metadata else None
+            msg = await self._bot.send_message(
+                chat_id=int(chat_id),
+                text=text,
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+                message_thread_id=int(thread_id) if thread_id else None,
+            )
+
+            # Store picker state keyed by chat_id
+            self._model_picker_state[str(chat_id)] = {
+                "msg_id": msg.message_id,
+                "providers": providers,
+                "session_key": session_key,
+                "on_model_selected": on_model_selected,
+                "current_model": current_model,
+                "current_provider": current_provider,
+            }
+
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
+    _MODEL_PAGE_SIZE = 8
+
+    def _build_model_keyboard(self, models: list, page: int) -> tuple:
+        """Build paginated model buttons. Returns (keyboard, page_info_text)."""
+        page_size = self._MODEL_PAGE_SIZE
+        total = len(models)
+        total_pages = max(1, (total + page_size - 1) // page_size)
+        page = max(0, min(page, total_pages - 1))
+
+        start = page * page_size
+        end = min(start + page_size, total)
+        page_models = models[start:end]
+
+        buttons: list = []
+        for i, model_id in enumerate(page_models):
+            abs_idx = start + i
+            short = model_id.split("/")[-1] if "/" in model_id else model_id
+            if len(short) > 38:
+                short = short[:35] + "..."
+            buttons.append(
+                InlineKeyboardButton(short, callback_data=f"mm:{abs_idx}")
+            )
+
+        rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+
+        # Pagination row (if needed)
+        if total_pages > 1:
+            nav: list = []
+            if page > 0:
+                nav.append(InlineKeyboardButton("◀ Prev", callback_data=f"mg:{page - 1}"))
+            nav.append(InlineKeyboardButton(f"{page + 1}/{total_pages}", callback_data="mx:noop"))
+            if page < total_pages - 1:
+                nav.append(InlineKeyboardButton("Next ▶", callback_data=f"mg:{page + 1}"))
+            rows.append(nav)
+
+        rows.append([
+            InlineKeyboardButton("◀ Back", callback_data="mb"),
+            InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+        ])
+
+        page_info = f" ({start + 1}–{end} of {total})" if total_pages > 1 else ""
+        return InlineKeyboardMarkup(rows), page_info
+
+    async def _handle_model_picker_callback(
+        self, query, data: str, chat_id: str
+    ) -> None:
+        """Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:/mg:)."""
+        state = self._model_picker_state.get(chat_id)
+        if not state:
+            await query.answer(text="Picker expired — use /model again.")
+            return
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        if data.startswith("mp:"):
+            # --- Provider selected: show model buttons (page 0) ---
+            provider_slug = data[3:]
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            if not provider:
+                await query.answer(text="Provider not found.")
+                return
+
+            models = provider.get("models", [])
+            state["selected_provider"] = provider_slug
+            state["selected_provider_name"] = provider.get("name", provider_slug)
+            state["model_list"] = models
+            state["model_page"] = 0
+
+            keyboard, page_info = self._build_model_keyboard(models, 0)
+
+            pname = provider.get("name", provider_slug)
+            total = provider.get("total_models", len(models))
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*{page_info}\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mg:"):
+            # --- Page navigation ---
+            try:
+                page = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid page.")
+                return
+
+            models = state.get("model_list", [])
+            state["model_page"] = page
+
+            keyboard, page_info = self._build_model_keyboard(models, page)
+
+            pname = state.get("selected_provider_name", "")
+            provider_slug = state.get("selected_provider", "")
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            total = provider.get("total_models", len(models)) if provider else len(models)
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*{page_info}\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mm:"):
+            # --- Model selected: perform the switch ---
+            try:
+                idx = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid selection.")
+                return
+
+            model_list = state.get("model_list", [])
+            if idx < 0 or idx >= len(model_list):
+                await query.answer(text="Invalid model index.")
+                return
+
+            model_id = model_list[idx]
+            provider_slug = state.get("selected_provider", "")
+            callback = state.get("on_model_selected")
+
+            if not callback:
+                await query.answer(text="Picker expired.")
+                return
+
+            try:
+                result_text = await callback(chat_id, model_id, provider_slug)
+            except Exception as exc:
+                logger.error("Model picker switch failed: %s", exc)
+                result_text = f"Error switching model: {exc}"
+
+            # Edit message to show confirmation, remove buttons
+            try:
+                await query.edit_message_text(
+                    text=result_text,
+                    parse_mode=ParseMode.MARKDOWN,
+                    reply_markup=None,
+                )
+            except Exception:
+                # Markdown parse failure — retry as plain text
+                try:
+                    await query.edit_message_text(
+                        text=result_text,
+                        parse_mode=None,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+            await query.answer(text="Model switched!")
+
+            # Clean up state
+            self._model_picker_state.pop(chat_id, None)
+
+        elif data == "mb":
+            # --- Back to provider list ---
+            buttons = []
+            for p in state["providers"]:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            try:
+                provider_label = get_label(state["current_provider"])
+            except Exception:
+                provider_label = state["current_provider"]
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Current model: `{state['current_model'] or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data == "mx":
+            # --- Cancel ---
+            self._model_picker_state.pop(chat_id, None)
+            await query.edit_message_text(
+                text="Model selection cancelled.",
+                reply_markup=None,
+            )
+            await query.answer()
+
+        else:
+            # Catch-all (e.g. page counter button "mx:noop")
+            await query.answer()
+
    async def _handle_callback_query(
        self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
    ) -> None:
-        """Handle inline keyboard button clicks (update prompts)."""
+        """Handle inline keyboard button clicks."""
        query = update.callback_query
        if not query or not query.data:
            return
        data = query.data
+
+        # --- Model picker callbacks ---
+        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
+            chat_id = str(query.message.chat_id) if query.message else None
+            if chat_id:
+                await self._handle_model_picker_callback(query, data, chat_id)
+            return
+
+        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
        answer = data.split(":", 1)[1]  # "y" or "n"
@@ -1127,6 +1127,7 @@ class GatewayRunner:
            # Set up message + fatal error handlers
            adapter.set_message_handler(self._handle_message)
            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+            adapter.set_session_store(self.session_store)
            
            # Try to connect
            logger.info("Connecting to %s...", platform.value)
@@ -1424,6 +1425,7 @@ class GatewayRunner:

                    adapter.set_message_handler(self._handle_message)
                    adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+                    adapter.set_session_store(self.session_store)

                    success = await adapter.connect()
                    if success:
@@ -3462,11 +3464,11 @@ class GatewayRunner:
            lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
        return "\n".join(lines)
    
-    async def _handle_model_command(self, event: MessageEvent) -> str:
+    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /model command — switch model for this session.

        Supports:
-          /model                              — show current model info
+          /model                              — interactive picker (Telegram/Discord) or text list
          /model <name>                       — switch for this session only
          /model <name> --global              — switch and persist to config.yaml
          /model <name> --provider <provider> — switch provider + model
@@ -3514,8 +3516,118 @@ class GatewayRunner:
            current_base_url = override.get("base_url", current_base_url)
            current_api_key = override.get("api_key", current_api_key)

-        # No args: show authenticated providers with models
+        # No args: show interactive picker (Telegram/Discord) or text list
        if not model_input and not explicit_provider:
+            # Try interactive picker if the platform supports it
+            adapter = self.adapters.get(source.platform)
+            has_picker = (
+                adapter is not None
+                and getattr(type(adapter), "send_model_picker", None) is not None
+            )
+
+            if has_picker:
+                try:
+                    providers = list_authenticated_providers(
+                        current_provider=current_provider,
+                        user_providers=user_provs,
+                        max_models=50,
+                    )
+                except Exception:
+                    providers = []
+
+                if providers:
+                    # Build a callback closure for when the user picks a model.
+                    # Captures self + locals needed for the switch logic.
+                    _self = self
+                    _session_key = session_key
+                    _cur_model = current_model
+                    _cur_provider = current_provider
+                    _cur_base_url = current_base_url
+                    _cur_api_key = current_api_key
+
+                    async def _on_model_selected(
+                        _chat_id: str, model_id: str, provider_slug: str
+                    ) -> str:
+                        """Perform the model switch and return confirmation text."""
+                        result = _switch_model(
+                            raw_input=model_id,
+                            current_provider=_cur_provider,
+                            current_model=_cur_model,
+                            current_base_url=_cur_base_url,
+                            current_api_key=_cur_api_key,
+                            is_global=False,
+                            explicit_provider=provider_slug,
+                        )
+                        if not result.success:
+                            return f"Error: {result.error_message}"
+
+                        # Update cached agent in-place
+                        cached_entry = None
+                        _cache_lock = getattr(_self, "_agent_cache_lock", None)
+                        _cache = getattr(_self, "_agent_cache", None)
+                        if _cache_lock and _cache is not None:
+                            with _cache_lock:
+                                cached_entry = _cache.get(_session_key)
+                        if cached_entry and cached_entry[0] is not None:
+                            try:
+                                cached_entry[0].switch_model(
+                                    new_model=result.new_model,
+                                    new_provider=result.target_provider,
+                                    api_key=result.api_key,
+                                    base_url=result.base_url,
+                                    api_mode=result.api_mode,
+                                )
+                            except Exception as exc:
+                                logger.warning("Picker model switch failed for cached agent: %s", exc)
+
+                        # Store model note + session override
+                        if not hasattr(_self, "_pending_model_notes"):
+                            _self._pending_model_notes = {}
+                        _self._pending_model_notes[_session_key] = (
+                            f"[Note: model was just switched from {_cur_model} to {result.new_model} "
+                            f"via {result.provider_label or result.target_provider}. "
+                            f"Adjust your self-identification accordingly.]"
+                        )
+                        if not hasattr(_self, "_session_model_overrides"):
+                            _self._session_model_overrides = {}
+                        _self._session_model_overrides[_session_key] = {
+                            "model": result.new_model,
+                            "provider": result.target_provider,
+                            "api_key": result.api_key,
+                            "base_url": result.base_url,
+                            "api_mode": result.api_mode,
+                        }
+
+                        # Build confirmation text
+                        plabel = result.provider_label or result.target_provider
+                        lines = [f"Model switched to `{result.new_model}`"]
+                        lines.append(f"Provider: {plabel}")
+                        mi = result.model_info
+                        if mi:
+                            if mi.context_window:
+                                lines.append(f"Context: {mi.context_window:,} tokens")
+                            if mi.max_output:
+                                lines.append(f"Max output: {mi.max_output:,} tokens")
+                            if mi.has_cost_data():
+                                lines.append(f"Cost: {mi.format_cost()}")
+                            lines.append(f"Capabilities: {mi.format_capabilities()}")
+                        lines.append("_(session only — use `/model <name> --global` to persist)_")
+                        return "\n".join(lines)
+
+                    metadata = {"thread_id": source.thread_id} if source.thread_id else None
+                    result = await adapter.send_model_picker(
+                        chat_id=source.chat_id,
+                        providers=providers,
+                        current_model=current_model,
+                        current_provider=current_provider,
+                        session_key=session_key,
+                        on_model_selected=_on_model_selected,
+                        metadata=metadata,
+                    )
+                    if result.success:
+                        return None  # Picker sent — adapter handles the response
+
+            # Fallback: text list (for platforms without picker or if picker failed)
            provider_label = get_label(current_provider)
            lines = [f"Current: `{current_model or 'unknown'}` on {provider_label}", ""]

@@ -5936,12 +6048,13 @@ class GatewayRunner:
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
        thread_id = watcher.get("thread_id", "")
+        agent_notify = watcher.get("notify_on_complete", False)
        notify_mode = self._load_background_notifications_mode()

-        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
-                      session_id, interval, notify_mode)
+        logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
+                      session_id, interval, notify_mode, agent_notify)

-        if notify_mode == "off":
+        if notify_mode == "off" and not agent_notify:
            # Still wait for the process to exit so we can log it, but don't
            # push any messages to the user.
            while True:
@@ -5965,6 +6078,47 @@ class GatewayRunner:
            last_output_len = current_output_len

            if session.exited:
+                # --- Agent-triggered completion: inject synthetic message ---
+                if agent_notify:
+                    from tools.ansi_strip import strip_ansi
+                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
+                    synth_text = (
+                        f"[SYSTEM: Background process {session_id} completed "
+                        f"(exit code {session.exit_code}).\n"
+                        f"Command: {session.command}\n"
+                        f"Output:\n{_out}]"
+                    )
+                    adapter = None
+                    for p, a in self.adapters.items():
+                        if p.value == platform_name:
+                            adapter = a
+                            break
+                    if adapter and chat_id:
+                        try:
+                            from gateway.platforms.base import MessageEvent, MessageType
+                            from gateway.session import SessionSource
+                            from gateway.config import Platform
+                            _platform_enum = Platform(platform_name)
+                            _source = SessionSource(
+                                platform=_platform_enum,
+                                chat_id=chat_id,
+                                thread_id=thread_id or None,
+                            )
+                            synth_event = MessageEvent(
+                                text=synth_text,
+                                message_type=MessageType.TEXT,
+                                source=_source,
+                            )
+                            logger.info(
+                                "Process %s finished — injecting agent notification for session %s",
+                                session_id, session_key,
+                            )
+                            await adapter.handle_message(synth_event)
+                        except Exception as e:
+                            logger.error("Agent notify injection error: %s", e)
+                    break
+
+                # --- Normal text-only notification ---
                # Decide whether to notify based on mode
                should_notify = (
                    notify_mode in ("all", "result")
@@ -5989,8 +6143,9 @@ class GatewayRunner:
                            logger.error("Watcher delivery error: %s", e)
                break

-            elif has_new_output and notify_mode == "all":
+            elif has_new_output and notify_mode == "all" and not agent_notify:
                # New output available -- deliver status update (only in "all" mode)
+                # Skip periodic updates for agent_notify watchers (they only care about completion)
                new_output = session.output_buffer[-500:] if session.output_buffer else ""
                message_text = (
                    f"[Background process {session_id} is still running~ "
@@ -7019,6 +7174,27 @@ class GatewayRunner:
                    if pending:
                        logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
            
+            # Safety net: if the pending text is a slash command (e.g. "/stop",
+            # "/new"), discard it — commands should never be passed to the agent
+            # as user input.  The primary fix is in base.py (commands bypass the
+            # active-session guard), but this catches edge cases where command
+            # text leaks through the interrupt_message fallback.
+            if pending and pending.strip().startswith("/"):
+                _pending_parts = pending.strip().split(None, 1)
+                _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
+                if _pending_cmd_word:
+                    try:
+                        from hermes_cli.commands import resolve_command as _rc_pending
+                        if _rc_pending(_pending_cmd_word):
+                            logger.info(
+                                "Discarding command '/%s' from pending queue — "
+                                "commands must not be passed as agent input",
+                                _pending_cmd_word,
+                            )
+                            pending = None
+                    except Exception:
+                        pass
+
            if pending:
                logger.debug("Processing pending message: '%s...'", pending[:40])
                
@@ -28,6 +28,10 @@ logger = logging.getLogger("gateway.stream_consumer")
 # Sentinel to signal the stream is complete
 _DONE = object()

+# Sentinel to signal a tool boundary — finalize current message and start a
+# new one so that subsequent text appears below tool progress messages.
+_NEW_SEGMENT = object()
+

@dataclass
 class StreamConsumerConfig:
@@ -78,9 +82,16 @@ class GatewayStreamConsumer:
        return self._already_sent

    def on_delta(self, text: str) -> None:
-        """Thread-safe callback — called from the agent's worker thread."""
+        """Thread-safe callback — called from the agent's worker thread.
+
+        When *text* is ``None``, signals a tool boundary: the current message
+        is finalized and subsequent text will be sent as a new message so it
+        appears below any tool-progress messages the gateway sent in between.
+        """
        if text:
            self._queue.put(text)
+        elif text is None:
+            self._queue.put(_NEW_SEGMENT)

    def finish(self) -> None:
        """Signal that the stream is complete."""
@@ -96,12 +107,16 @@ class GatewayStreamConsumer:
            while True:
                # Drain all available items from the queue
                got_done = False
+                got_segment_break = False
                while True:
                    try:
                        item = self._queue.get_nowait()
                        if item is _DONE:
                            got_done = True
                            break
+                        if item is _NEW_SEGMENT:
+                            got_segment_break = True
+                            break
                        self._accumulated += item
                    except queue.Empty:
                        break
@@ -111,6 +126,7 @@ class GatewayStreamConsumer:
                elapsed = now - self._last_edit_time
                should_edit = (
                    got_done
+                    or got_segment_break
                    or (elapsed >= self.cfg.edit_interval
                        and len(self._accumulated) > 0)
                    or len(self._accumulated) >= self.cfg.buffer_threshold
@@ -133,7 +149,7 @@ class GatewayStreamConsumer:
                        self._last_sent_text = ""

                    display_text = self._accumulated
-                    if not got_done:
+                    if not got_done and not got_segment_break:
                        display_text += self.cfg.cursor

                    await self._send_or_edit(display_text)
@@ -145,6 +161,15 @@ class GatewayStreamConsumer:
                        await self._send_or_edit(self._accumulated)
                    return

+                # Tool boundary: the should_edit block above already flushed
+                # accumulated text without a cursor.  Reset state so the next
+                # text chunk creates a fresh message below any tool-progress
+                # messages the gateway sent in between.
+                if got_segment_break:
+                    self._message_id = None
+                    self._accumulated = ""
+                    self._last_sent_text = ""
+
                await asyncio.sleep(0.05)  # Small yield to not busy-loop

        except asyncio.CancelledError:
@@ -404,6 +404,47 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
    return None


+def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
+    """Return the correct Z.AI base URL by probing endpoints.
+
+    If the user has explicitly set GLM_BASE_URL, that always wins.
+    Otherwise, probe the candidate endpoints to find one that accepts the
+    key.  The detected endpoint is cached in provider state (auth.json) keyed
+    on a hash of the API key so subsequent starts skip the probe.
+    """
+    if env_override:
+        return env_override
+
+    # Check provider-state cache for a previously-detected endpoint.
+    auth_store = _load_auth_store()
+    state = _load_provider_state(auth_store, "zai") or {}
+    cached = state.get("detected_endpoint")
+    if isinstance(cached, dict) and cached.get("base_url"):
+        key_hash = cached.get("key_hash", "")
+        if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
+            logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
+            return cached["base_url"]
+
+    # Probe — may take up to ~8s per endpoint.
+    detected = detect_zai_endpoint(api_key)
+    if detected and detected.get("base_url"):
+        # Persist the detection result keyed on the API key hash.
+        key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
+        state["detected_endpoint"] = {
+            "base_url": detected["base_url"],
+            "endpoint_id": detected.get("id", ""),
+            "model": detected.get("model", ""),
+            "label": detected.get("label", ""),
+            "key_hash": key_hash,
+        }
+        _save_provider_state(auth_store, "zai", state)
+        logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
+        return detected["base_url"]
+
+    logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
+    return default_url
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -2063,6 +2104,8 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:

    if provider_id == "kimi-coding":
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif provider_id == "zai":
+        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
    elif env_url:
        base_url = env_url.rstrip("/")
    else:
@@ -1803,8 +1803,7 @@ def _setup_signal():
        print_warning("signal-cli not found on PATH.")
        print_info("  Signal requires signal-cli running as an HTTP daemon.")
        print_info("  Install options:")
-        print_info("    Linux:  sudo apt install signal-cli")
-        print_info("            or download from https://github.com/AsamK/signal-cli")
+        print_info("    Linux:  download from https://github.com/AsamK/signal-cli/releases")
        print_info("    macOS:  brew install signal-cli")
        print_info("    Docker: bbernhard/signal-cli-rest-api")
        print()
@@ -3566,7 +3566,7 @@ def cmd_update(args):
        try:
            from hermes_cli.profiles import list_profiles, get_active_profile_name, seed_profile_skills
            active = get_active_profile_name()
-            other_profiles = [p for p in list_profiles() if not p.is_default and p.name != active]
+            other_profiles = [p for p in list_profiles() if p.name != active]
            if other_profiles:
                print()
                print("→ Syncing bundled skills to other profiles...")
@@ -167,20 +167,20 @@ def _resolve_browser_feature_state(
    if browser_provider_explicit:
        current_provider = browser_provider or "local"
        if current_provider == "browserbase":
-            provider_available = managed_browser_available or direct_browserbase
+            available = bool(browser_local_available and direct_browserbase)
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, False
+        if current_provider == "browser-use":
+            provider_available = managed_browser_available or direct_browser_use
            available = bool(browser_local_available and provider_available)
            managed = bool(
                browser_tool_enabled
                and browser_local_available
                and managed_browser_available
-                and not direct_browserbase
+                and not direct_browser_use
            )
            active = bool(browser_tool_enabled and available)
            return current_provider, available, active, managed
-        if current_provider == "browser-use":
-            available = bool(browser_local_available and direct_browser_use)
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, False
        if current_provider == "firecrawl":
            available = bool(browser_local_available and direct_firecrawl)
            active = bool(browser_tool_enabled and available)
@@ -193,16 +193,21 @@ def _resolve_browser_feature_state(
        active = bool(browser_tool_enabled and available)
        return current_provider, available, active, False

-    if managed_browser_available or direct_browserbase:
+    if managed_browser_available or direct_browser_use:
        available = bool(browser_local_available)
        managed = bool(
            browser_tool_enabled
            and browser_local_available
            and managed_browser_available
-            and not direct_browserbase
+            and not direct_browser_use
        )
        active = bool(browser_tool_enabled and available)
-        return "browserbase", available, active, managed
+        return "browser-use", available, active, managed
+
+    if direct_browserbase:
+        available = bool(browser_local_available)
+        active = bool(browser_tool_enabled and available)
+        return "browserbase", available, active, False

    available = bool(browser_local_available)
    active = bool(browser_tool_enabled and available)
@@ -266,7 +271,7 @@ def get_nous_subscription_features(
    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
-    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
+    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
    modal_state = resolve_modal_backend_state(
        modal_mode,
@@ -512,10 +517,10 @@ def apply_nous_managed_defaults(
        changed.add("tts")

    if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
-        get_env_value("BROWSERBASE_API_KEY")
-        or get_env_value("BROWSER_USE_API_KEY")
+        get_env_value("BROWSER_USE_API_KEY")
+        or get_env_value("BROWSERBASE_API_KEY")
    ):
-        browser_cfg["cloud_provider"] = "browserbase"
+        browser_cfg["cloud_provider"] = "browser-use"
        changed.add("browser")

    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
@@ -639,31 +639,47 @@ def resolve_runtime_provider(
            )

    if provider == "nous":
-        creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-        )
-        return {
-            "provider": "nous",
-            "api_mode": "chat_completions",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "portal"),
-            "expires_at": creds.get("expires_at"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            )
+            return {
+                "provider": "nous",
+                "api_mode": "chat_completions",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "portal"),
+                "expires_at": creds.get("expires_at"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Nous but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Nous provider but credentials failed; "
+                        "falling through to next provider.")

    if provider == "openai-codex":
-        creds = resolve_codex_runtime_credentials()
-        return {
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "hermes-auth-store"),
-            "last_refresh": creds.get("last_refresh"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_codex_runtime_credentials()
+            return {
+                "provider": "openai-codex",
+                "api_mode": "codex_responses",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "hermes-auth-store"),
+                "last_refresh": creds.get("last_refresh"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Codex but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Codex provider but credentials failed; "
+                        "falling through to next provider.")

    if provider == "copilot-acp":
        creds = resolve_external_process_provider_credentials(provider)
@@ -660,14 +660,14 @@ def _print_setup_summary(config: dict, hermes_home):
    # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
    browser_provider = subscription_features.browser.current_provider
    if subscription_features.browser.managed_by_nous:
-        tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
+        tool_status.append(("Browser Automation (Nous Browser Use)", True, None))
    elif subscription_features.browser.available:
        label = "Browser Automation"
        if browser_provider:
            label = f"Browser Automation ({browser_provider})"
        tool_status.append((label, True, None))
    else:
-        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
+        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browser Use or Browserbase"
        if browser_provider == "Browserbase":
            missing_browser_hint = (
                "npm install -g agent-browser and set "
@@ -123,7 +123,8 @@ def show_status(args):
        "MiniMax-CN": "MINIMAX_CN_API_KEY",
        "Firecrawl": "FIRECRAWL_API_KEY",
        "Tavily": "TAVILY_API_KEY",
-        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — local browser works without this
+        "Browser Use": "BROWSER_USE_API_KEY",  # Optional — local browser works without this
+        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — direct credentials only
        "FAL": "FAL_KEY",
        "Tinker": "TINKER_API_KEY",
        "WandB": "WANDB_API_KEY",
@@ -280,21 +280,21 @@ TOOL_CATEGORIES = {
        "icon": "🌐",
        "providers": [
            {
-                "name": "Nous Subscription (Browserbase cloud)",
-                "tag": "Managed Browserbase billed to your subscription",
+                "name": "Nous Subscription (Browser Use cloud)",
+                "tag": "Managed Browser Use billed to your subscription",
                "env_vars": [],
-                "browser_provider": "browserbase",
+                "browser_provider": "browser-use",
                "requires_nous_auth": True,
                "managed_nous_feature": "browser",
-                "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
-                "post_setup": "browserbase",
+                "override_env_vars": ["BROWSER_USE_API_KEY"],
+                "post_setup": "agent_browser",
            },
            {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
                "browser_provider": "local",
-                "post_setup": "browserbase",  # Same npm install for agent-browser
+                "post_setup": "agent_browser",
            },
            {
                "name": "Browserbase",
@@ -304,7 +304,7 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
                ],
                "browser_provider": "browserbase",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
            },
            {
                "name": "Browser Use",
@@ -313,7 +313,7 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
                ],
                "browser_provider": "browser-use",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
            },
            {
                "name": "Firecrawl",
@@ -322,7 +322,7 @@ TOOL_CATEGORIES = {
                    {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
                ],
                "browser_provider": "firecrawl",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
            },
            {
                "name": "Camofox",
@@ -381,7 +381,7 @@ TOOLSET_ENV_REQUIREMENTS = {
 def _run_post_setup(post_setup_key: str):
    """Run post-setup hooks for tools that need extra installation steps."""
    import shutil
-    if post_setup_key == "browserbase":
+    if post_setup_key in ("agent_browser", "browserbase"):
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
        if not node_modules.exists() and shutil.which("npm"):
            _print_info("    Installing Node.js dependencies for browser tools...")
@@ -211,7 +211,7 @@ _LEGACY_TOOLSET_MAP = {
    "browser_tools": [
        "browser_navigate", "browser_snapshot", "browser_click",
        "browser_type", "browser_scroll", "browser_back",
-        "browser_press", "browser_close", "browser_get_images",
+        "browser_press", "browser_get_images",
        "browser_vision", "browser_console"
    ],
    "cronjob_tools": ["cronjob"],
@@ -0,0 +1,54 @@
+# Supermemory Memory Provider
+
+Semantic long-term memory with profile recall, semantic search, explicit memory tools, and session-end conversation ingest.
+
+## Requirements
+
+- `pip install supermemory`
+- Supermemory API key from [supermemory.ai](https://supermemory.ai)
+
+## Setup
+
+```bash
+hermes memory setup    # select "supermemory"
+```
+
+Or manually:
+
+```bash
+hermes config set memory.provider supermemory
+echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
+```
+
+## Config
+
+Config file: `$HERMES_HOME/supermemory.json`
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `container_tag` | `hermes` | Container tag used for search and writes |
+| `auto_recall` | `true` | Inject relevant memory context before turns |
+| `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
+| `max_recall_results` | `10` | Max recalled items to format into context |
+| `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
+| `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `entity_context` | built-in default | Extraction guidance passed to Supermemory |
+| `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `supermemory_store` | Store an explicit memory |
+| `supermemory_search` | Search memories by semantic similarity |
+| `supermemory_forget` | Forget a memory by ID or best-match query |
+| `supermemory_profile` | Retrieve persistent profile and recent context |
+
+## Behavior
+
+When enabled, Hermes can:
+
+- prefetch relevant memory context before each turn
+- store cleaned conversation turns after each completed response
+- ingest the full session on session end for richer graph updates
+- expose explicit tools for search, store, forget, and profile access
@@ -0,0 +1,671 @@
+"""Supermemory memory plugin using the MemoryProvider interface.
+
+Provides semantic long-term memory with profile recall, semantic search,
+explicit memory tools, cleaned turn capture, and session-end conversation ingest.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import threading
+import urllib.error
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_CONTAINER_TAG = "hermes"
+_DEFAULT_MAX_RECALL_RESULTS = 10
+_DEFAULT_PROFILE_FREQUENCY = 50
+_DEFAULT_CAPTURE_MODE = "all"
+_DEFAULT_API_TIMEOUT = 5.0
+_MIN_CAPTURE_LENGTH = 10
+_MAX_ENTITY_CONTEXT_LENGTH = 1500
+_CONVERSATIONS_URL = "https://api.supermemory.ai/v4/conversations"
+_TRIVIAL_RE = re.compile(
+    r"^(ok|okay|thanks|thank you|got it|sure|yes|no|yep|nope|k|ty|thx|np)\.?$",
+    re.IGNORECASE,
+)
+_CONTEXT_STRIP_RE = re.compile(
+    r"<supermemory-context>[\s\S]*?</supermemory-context>\s*", re.DOTALL
+)
+_CONTAINERS_STRIP_RE = re.compile(
+    r"<supermemory-containers>[\s\S]*?</supermemory-containers>\s*", re.DOTALL
+)
+_DEFAULT_ENTITY_CONTEXT = (
+    "User-assistant conversation. Format: [role: user]...[user:end] and "
+    "[role: assistant]...[assistant:end].\n\n"
+    "Only extract things useful in future conversations. Most messages are not worth remembering.\n\n"
+    "Remember lasting personal facts, preferences, routines, tools, ongoing projects, working context, "
+    "and explicit requests to remember something.\n\n"
+    "Do not remember temporary intents, one-time tasks, assistant actions, implementation details, or in-progress status.\n\n"
+    "When in doubt, store less."
+)
+
+
+def _default_config() -> dict:
+    return {
+        "container_tag": _DEFAULT_CONTAINER_TAG,
+        "auto_recall": True,
+        "auto_capture": True,
+        "max_recall_results": _DEFAULT_MAX_RECALL_RESULTS,
+        "profile_frequency": _DEFAULT_PROFILE_FREQUENCY,
+        "capture_mode": _DEFAULT_CAPTURE_MODE,
+        "entity_context": _DEFAULT_ENTITY_CONTEXT,
+        "api_timeout": _DEFAULT_API_TIMEOUT,
+    }
+
+
+def _sanitize_tag(raw: str) -> str:
+    tag = re.sub(r"[^a-zA-Z0-9_]", "_", raw or "")
+    tag = re.sub(r"_+", "_", tag)
+    return tag.strip("_") or _DEFAULT_CONTAINER_TAG
+
+
+def _clamp_entity_context(text: str) -> str:
+    if not text:
+        return _DEFAULT_ENTITY_CONTEXT
+    text = text.strip()
+    return text[:_MAX_ENTITY_CONTEXT_LENGTH]
+
+
+def _as_bool(value: Any, default: bool) -> bool:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        lowered = value.strip().lower()
+        if lowered in ("true", "1", "yes", "y", "on"):
+            return True
+        if lowered in ("false", "0", "no", "n", "off"):
+            return False
+    return default
+
+
+def _load_supermemory_config(hermes_home: str) -> dict:
+    config = _default_config()
+    config_path = Path(hermes_home) / "supermemory.json"
+    if config_path.exists():
+        try:
+            raw = json.loads(config_path.read_text(encoding="utf-8"))
+            if isinstance(raw, dict):
+                config.update({k: v for k, v in raw.items() if v is not None})
+        except Exception:
+            logger.debug("Failed to parse %s", config_path, exc_info=True)
+
+    config["container_tag"] = _sanitize_tag(str(config.get("container_tag", _DEFAULT_CONTAINER_TAG)))
+    config["auto_recall"] = _as_bool(config.get("auto_recall"), True)
+    config["auto_capture"] = _as_bool(config.get("auto_capture"), True)
+    try:
+        config["max_recall_results"] = max(1, min(20, int(config.get("max_recall_results", _DEFAULT_MAX_RECALL_RESULTS))))
+    except Exception:
+        config["max_recall_results"] = _DEFAULT_MAX_RECALL_RESULTS
+    try:
+        config["profile_frequency"] = max(1, min(500, int(config.get("profile_frequency", _DEFAULT_PROFILE_FREQUENCY))))
+    except Exception:
+        config["profile_frequency"] = _DEFAULT_PROFILE_FREQUENCY
+    config["capture_mode"] = "everything" if config.get("capture_mode") == "everything" else "all"
+    config["entity_context"] = _clamp_entity_context(str(config.get("entity_context", _DEFAULT_ENTITY_CONTEXT)))
+    try:
+        config["api_timeout"] = max(0.5, min(15.0, float(config.get("api_timeout", _DEFAULT_API_TIMEOUT))))
+    except Exception:
+        config["api_timeout"] = _DEFAULT_API_TIMEOUT
+    return config
+
+
+def _save_supermemory_config(values: dict, hermes_home: str) -> None:
+    config_path = Path(hermes_home) / "supermemory.json"
+    existing = {}
+    if config_path.exists():
+        try:
+            raw = json.loads(config_path.read_text(encoding="utf-8"))
+            if isinstance(raw, dict):
+                existing = raw
+        except Exception:
+            existing = {}
+    existing.update(values)
+    config_path.write_text(json.dumps(existing, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def _detect_category(text: str) -> str:
+    lowered = text.lower()
+    if re.search(r"prefer|like|love|hate|want", lowered):
+        return "preference"
+    if re.search(r"decided|will use|going with", lowered):
+        return "decision"
+    if re.search(r"\bis\b|\bare\b|\bhas\b|\bhave\b", lowered):
+        return "fact"
+    return "other"
+
+
+def _format_relative_time(iso_timestamp: str) -> str:
+    try:
+        dt = datetime.fromisoformat(iso_timestamp.replace("Z", "+00:00"))
+        now = datetime.now(timezone.utc)
+        seconds = (now - dt).total_seconds()
+        if seconds < 1800:
+            return "just now"
+        if seconds < 3600:
+            return f"{int(seconds / 60)}m ago"
+        if seconds < 86400:
+            return f"{int(seconds / 3600)}h ago"
+        if seconds < 604800:
+            return f"{int(seconds / 86400)}d ago"
+        if dt.year == now.year:
+            return dt.strftime("%d %b")
+        return dt.strftime("%d %b %Y")
+    except Exception:
+        return ""
+
+
+def _deduplicate_recall(static_facts: list, dynamic_facts: list, search_results: list) -> tuple[list, list, list]:
+    seen = set()
+    out_static, out_dynamic, out_search = [], [], []
+    for fact in static_facts or []:
+        if fact and fact not in seen:
+            seen.add(fact)
+            out_static.append(fact)
+    for fact in dynamic_facts or []:
+        if fact and fact not in seen:
+            seen.add(fact)
+            out_dynamic.append(fact)
+    for item in search_results or []:
+        memory = item.get("memory", "")
+        if memory and memory not in seen:
+            seen.add(memory)
+            out_search.append(item)
+    return out_static, out_dynamic, out_search
+
+
+def _format_prefetch_context(static_facts: list, dynamic_facts: list, search_results: list, max_results: int) -> str:
+    statics, dynamics, search = _deduplicate_recall(static_facts, dynamic_facts, search_results)
+    statics = statics[:max_results]
+    dynamics = dynamics[:max_results]
+    search = search[:max_results]
+    if not statics and not dynamics and not search:
+        return ""
+
+    sections = []
+    if statics:
+        sections.append("## User Profile (Persistent)\n" + "\n".join(f"- {item}" for item in statics))
+    if dynamics:
+        sections.append("## Recent Context\n" + "\n".join(f"- {item}" for item in dynamics))
+    if search:
+        lines = []
+        for item in search:
+            memory = item.get("memory", "")
+            if not memory:
+                continue
+            similarity = item.get("similarity")
+            updated = item.get("updated_at") or item.get("updatedAt") or ""
+            prefix_bits = []
+            rel = _format_relative_time(updated)
+            if rel:
+                prefix_bits.append(f"[{rel}]")
+            if similarity is not None:
+                try:
+                    prefix_bits.append(f"[{round(float(similarity) * 100)}%]")
+                except Exception:
+                    pass
+            prefix = " ".join(prefix_bits)
+            lines.append(f"- {prefix} {memory}".strip())
+        if lines:
+            sections.append("## Relevant Memories\n" + "\n".join(lines))
+    if not sections:
+        return ""
+
+    intro = (
+        "The following is background context from long-term memory. Use it silently when relevant. "
+        "Do not force memories into the conversation."
+    )
+    body = "\n\n".join(sections)
+    return f"<supermemory-context>\n{intro}\n\n{body}\n</supermemory-context>"
+
+
+def _clean_text_for_capture(text: str) -> str:
+    text = _CONTEXT_STRIP_RE.sub("", text or "")
+    text = _CONTAINERS_STRIP_RE.sub("", text)
+    return text.strip()
+
+
+def _is_trivial_message(text: str) -> bool:
+    return bool(_TRIVIAL_RE.match((text or "").strip()))
+
+
+class _SupermemoryClient:
+    def __init__(self, api_key: str, timeout: float, container_tag: str):
+        from supermemory import Supermemory
+
+        self._api_key = api_key
+        self._container_tag = container_tag
+        self._timeout = timeout
+        self._client = Supermemory(api_key=api_key, timeout=timeout, max_retries=0)
+
+    def add_memory(self, content: str, metadata: Optional[dict] = None, *, entity_context: str = "") -> dict:
+        kwargs = {
+            "content": content.strip(),
+            "container_tags": [self._container_tag],
+        }
+        if metadata:
+            kwargs["metadata"] = metadata
+        if entity_context:
+            kwargs["entity_context"] = _clamp_entity_context(entity_context)
+        result = self._client.documents.add(**kwargs)
+        return {"id": getattr(result, "id", "")}
+
+    def search_memories(self, query: str, *, limit: int = 5) -> list[dict]:
+        response = self._client.search.memories(q=query, container_tag=self._container_tag, limit=limit)
+        results = []
+        for item in (getattr(response, "results", None) or []):
+            results.append({
+                "id": getattr(item, "id", ""),
+                "memory": getattr(item, "memory", "") or "",
+                "similarity": getattr(item, "similarity", None),
+                "updated_at": getattr(item, "updated_at", None) or getattr(item, "updatedAt", None),
+                "metadata": getattr(item, "metadata", None),
+            })
+        return results
+
+    def get_profile(self, query: Optional[str] = None) -> dict:
+        kwargs = {"container_tag": self._container_tag}
+        if query:
+            kwargs["q"] = query
+        response = self._client.profile(**kwargs)
+        profile_data = getattr(response, "profile", None)
+        search_data = getattr(response, "search_results", None) or getattr(response, "searchResults", None)
+        static = getattr(profile_data, "static", []) or [] if profile_data else []
+        dynamic = getattr(profile_data, "dynamic", []) or [] if profile_data else []
+        raw_results = getattr(search_data, "results", None) or search_data or []
+        search_results = []
+        if isinstance(raw_results, list):
+            for item in raw_results:
+                if isinstance(item, dict):
+                    search_results.append(item)
+                else:
+                    search_results.append({
+                        "memory": getattr(item, "memory", ""),
+                        "updated_at": getattr(item, "updated_at", None) or getattr(item, "updatedAt", None),
+                        "similarity": getattr(item, "similarity", None),
+                    })
+        return {"static": static, "dynamic": dynamic, "search_results": search_results}
+
+    def forget_memory(self, memory_id: str) -> None:
+        self._client.memories.forget(container_tag=self._container_tag, id=memory_id)
+
+    def forget_by_query(self, query: str) -> dict:
+        results = self.search_memories(query, limit=5)
+        if not results:
+            return {"success": False, "message": "No matching memory found to forget."}
+        target = results[0]
+        memory_id = target.get("id", "")
+        if not memory_id:
+            return {"success": False, "message": "Best matching memory has no id."}
+        self.forget_memory(memory_id)
+        preview = (target.get("memory") or "")[:100]
+        return {"success": True, "message": f'Forgot: "{preview}"', "id": memory_id}
+
+    def ingest_conversation(self, session_id: str, messages: list[dict]) -> None:
+        payload = json.dumps({
+            "conversationId": session_id,
+            "messages": messages,
+            "containerTags": [self._container_tag],
+        }).encode("utf-8")
+        req = urllib.request.Request(
+            _CONVERSATIONS_URL,
+            data=payload,
+            headers={
+                "Authorization": f"Bearer {self._api_key}",
+                "Content-Type": "application/json",
+            },
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=self._timeout + 3):
+            return
+
+
+STORE_SCHEMA = {
+    "name": "supermemory_store",
+    "description": "Store an explicit memory for future recall.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The memory content to store."},
+            "metadata": {"type": "object", "description": "Optional metadata attached to the memory."},
+        },
+        "required": ["content"],
+    },
+}
+
+SEARCH_SCHEMA = {
+    "name": "supermemory_search",
+    "description": "Search long-term memory by semantic similarity.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+            "limit": {"type": "integer", "description": "Maximum results to return, 1 to 20."},
+        },
+        "required": ["query"],
+    },
+}
+
+FORGET_SCHEMA = {
+    "name": "supermemory_forget",
+    "description": "Forget a memory by exact id or by best-match query.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "id": {"type": "string", "description": "Exact memory id to delete."},
+            "query": {"type": "string", "description": "Query used to find the memory to forget."},
+        },
+    },
+}
+
+PROFILE_SCHEMA = {
+    "name": "supermemory_profile",
+    "description": "Retrieve persistent profile facts and recent memory context.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "Optional query to focus the profile response."},
+        },
+    },
+}
+
+
+class SupermemoryMemoryProvider(MemoryProvider):
+    def __init__(self):
+        self._config = _default_config()
+        self._api_key = ""
+        self._client: Optional[_SupermemoryClient] = None
+        self._container_tag = _DEFAULT_CONTAINER_TAG
+        self._session_id = ""
+        self._turn_count = 0
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread: Optional[threading.Thread] = None
+        self._sync_thread: Optional[threading.Thread] = None
+        self._write_thread: Optional[threading.Thread] = None
+        self._auto_recall = True
+        self._auto_capture = True
+        self._max_recall_results = _DEFAULT_MAX_RECALL_RESULTS
+        self._profile_frequency = _DEFAULT_PROFILE_FREQUENCY
+        self._capture_mode = _DEFAULT_CAPTURE_MODE
+        self._entity_context = _DEFAULT_ENTITY_CONTEXT
+        self._api_timeout = _DEFAULT_API_TIMEOUT
+        self._hermes_home = ""
+        self._write_enabled = True
+        self._active = False
+
+    @property
+    def name(self) -> str:
+        return "supermemory"
+
+    def is_available(self) -> bool:
+        api_key = os.environ.get("SUPERMEMORY_API_KEY", "")
+        if not api_key:
+            return False
+        try:
+            __import__("supermemory")
+            return True
+        except Exception:
+            return False
+
+    def get_config_schema(self):
+        return [
+            {"key": "api_key", "description": "Supermemory API key", "secret": True, "required": True, "env_var": "SUPERMEMORY_API_KEY", "url": "https://supermemory.ai"},
+            {"key": "container_tag", "description": "Container tag for reads and writes", "default": _DEFAULT_CONTAINER_TAG},
+            {"key": "auto_recall", "description": "Enable automatic recall before each turn", "default": "true", "choices": ["true", "false"]},
+            {"key": "auto_capture", "description": "Enable automatic capture after each completed turn", "default": "true", "choices": ["true", "false"]},
+            {"key": "max_recall_results", "description": "Maximum recalled items to inject", "default": str(_DEFAULT_MAX_RECALL_RESULTS)},
+            {"key": "profile_frequency", "description": "Include profile facts on first turn and every N turns", "default": str(_DEFAULT_PROFILE_FREQUENCY)},
+            {"key": "capture_mode", "description": "Capture mode", "default": _DEFAULT_CAPTURE_MODE, "choices": ["all", "everything"]},
+            {"key": "entity_context", "description": "Extraction guidance passed to Supermemory", "default": _DEFAULT_ENTITY_CONTEXT},
+            {"key": "api_timeout", "description": "Timeout in seconds for SDK and ingest calls", "default": str(_DEFAULT_API_TIMEOUT)},
+        ]
+
+    def save_config(self, values, hermes_home):
+        sanitized = dict(values or {})
+        if "container_tag" in sanitized:
+            sanitized["container_tag"] = _sanitize_tag(str(sanitized["container_tag"]))
+        if "entity_context" in sanitized:
+            sanitized["entity_context"] = _clamp_entity_context(str(sanitized["entity_context"]))
+        _save_supermemory_config(sanitized, hermes_home)
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        from hermes_constants import get_hermes_home
+        self._hermes_home = kwargs.get("hermes_home") or str(get_hermes_home())
+        self._session_id = session_id
+        self._turn_count = 0
+        self._config = _load_supermemory_config(self._hermes_home)
+        self._api_key = os.environ.get("SUPERMEMORY_API_KEY", "")
+        self._container_tag = self._config["container_tag"]
+        self._auto_recall = self._config["auto_recall"]
+        self._auto_capture = self._config["auto_capture"]
+        self._max_recall_results = self._config["max_recall_results"]
+        self._profile_frequency = self._config["profile_frequency"]
+        self._capture_mode = self._config["capture_mode"]
+        self._entity_context = self._config["entity_context"]
+        self._api_timeout = self._config["api_timeout"]
+        agent_context = kwargs.get("agent_context", "")
+        self._write_enabled = agent_context not in ("cron", "flush", "subagent")
+        self._active = bool(self._api_key)
+        self._client = None
+        if self._active:
+            try:
+                self._client = _SupermemoryClient(
+                    api_key=self._api_key,
+                    timeout=self._api_timeout,
+                    container_tag=self._container_tag,
+                )
+            except Exception:
+                logger.warning("Supermemory initialization failed", exc_info=True)
+                self._active = False
+                self._client = None
+
+    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
+        self._turn_count = max(turn_number, 0)
+
+    def system_prompt_block(self) -> str:
+        if not self._active:
+            return ""
+        return (
+            "# Supermemory\n"
+            f"Active. Container: {self._container_tag}.\n"
+            "Use supermemory_search, supermemory_store, supermemory_forget, and supermemory_profile for explicit memory operations."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if not self._active or not self._auto_recall or not self._client or not query.strip():
+            return ""
+        try:
+            profile = self._client.get_profile(query=query[:200])
+            include_profile = self._turn_count <= 1 or (self._turn_count % self._profile_frequency == 0)
+            context = _format_prefetch_context(
+                static_facts=profile["static"] if include_profile else [],
+                dynamic_facts=profile["dynamic"] if include_profile else [],
+                search_results=profile["search_results"],
+                max_results=self._max_recall_results,
+            )
+            return context
+        except Exception:
+            logger.debug("Supermemory prefetch failed", exc_info=True)
+            return ""
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        if not self._active or not self._auto_capture or not self._write_enabled or not self._client:
+            return
+
+        clean_user = _clean_text_for_capture(user_content)
+        clean_assistant = _clean_text_for_capture(assistant_content)
+        if not clean_user or not clean_assistant:
+            return
+        if self._capture_mode == "all":
+            if len(clean_user) < _MIN_CAPTURE_LENGTH or len(clean_assistant) < _MIN_CAPTURE_LENGTH:
+                return
+            if _is_trivial_message(clean_user):
+                return
+
+        content = (
+            f"[role: user]\n{clean_user}\n[user:end]\n\n"
+            f"[role: assistant]\n{clean_assistant}\n[assistant:end]"
+        )
+        metadata = {"source": "hermes", "type": "conversation_turn"}
+
+        def _run():
+            try:
+                self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context)
+            except Exception:
+                logger.debug("Supermemory sync_turn failed", exc_info=True)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=2.0)
+        self._sync_thread = None
+        self._sync_thread = threading.Thread(target=_run, daemon=True, name="supermemory-sync")
+        self._sync_thread.start()
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        if not self._active or not self._write_enabled or not self._client or not self._session_id:
+            return
+        cleaned = []
+        for message in messages or []:
+            role = message.get("role")
+            if role not in ("user", "assistant"):
+                continue
+            content = _clean_text_for_capture(str(message.get("content", "")))
+            if content:
+                cleaned.append({"role": role, "content": content})
+        if not cleaned:
+            return
+        if len(cleaned) == 1 and len(cleaned[0].get("content", "")) < 20:
+            return
+        try:
+            self._client.ingest_conversation(self._session_id, cleaned)
+        except urllib.error.HTTPError:
+            logger.warning("Supermemory session ingest failed", exc_info=True)
+        except Exception:
+            logger.warning("Supermemory session ingest failed", exc_info=True)
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        if not self._active or not self._write_enabled or not self._client:
+            return
+        if action != "add" or not (content or "").strip():
+            return
+
+        def _run():
+            try:
+                self._client.add_memory(
+                    content.strip(),
+                    metadata={"source": "hermes_memory", "target": target, "type": "explicit_memory"},
+                    entity_context=self._entity_context,
+                )
+            except Exception:
+                logger.debug("Supermemory on_memory_write failed", exc_info=True)
+
+        if self._write_thread and self._write_thread.is_alive():
+            self._write_thread.join(timeout=2.0)
+        self._write_thread = None
+        self._write_thread = threading.Thread(target=_run, daemon=False, name="supermemory-memory-write")
+        self._write_thread.start()
+
+    def shutdown(self) -> None:
+        for attr_name in ("_prefetch_thread", "_sync_thread", "_write_thread"):
+            thread = getattr(self, attr_name, None)
+            if thread and thread.is_alive():
+                thread.join(timeout=5.0)
+            setattr(self, attr_name, None)
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]
+
+    def _tool_store(self, args: dict) -> str:
+        content = str(args.get("content") or "").strip()
+        if not content:
+            return json.dumps({"error": "content is required"})
+        metadata = args.get("metadata") or {}
+        if not isinstance(metadata, dict):
+            metadata = {}
+        metadata.setdefault("type", _detect_category(content))
+        metadata["source"] = "hermes_tool"
+        try:
+            result = self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context)
+            preview = content[:80] + ("..." if len(content) > 80 else "")
+            return json.dumps({"saved": True, "id": result.get("id", ""), "preview": preview})
+        except Exception as exc:
+            return json.dumps({"error": f"Failed to store memory: {exc}"})
+
+    def _tool_search(self, args: dict) -> str:
+        query = str(args.get("query") or "").strip()
+        if not query:
+            return json.dumps({"error": "query is required"})
+        try:
+            limit = max(1, min(20, int(args.get("limit", 5) or 5)))
+        except Exception:
+            limit = 5
+        try:
+            results = self._client.search_memories(query, limit=limit)
+            formatted = []
+            for item in results:
+                entry = {"id": item.get("id", ""), "content": item.get("memory", "")}
+                if item.get("similarity") is not None:
+                    try:
+                        entry["similarity"] = round(float(item["similarity"]) * 100)
+                    except Exception:
+                        pass
+                formatted.append(entry)
+            return json.dumps({"results": formatted, "count": len(formatted)})
+        except Exception as exc:
+            return json.dumps({"error": f"Search failed: {exc}"})
+
+    def _tool_forget(self, args: dict) -> str:
+        memory_id = str(args.get("id") or "").strip()
+        query = str(args.get("query") or "").strip()
+        if not memory_id and not query:
+            return json.dumps({"error": "Provide either id or query"})
+        try:
+            if memory_id:
+                self._client.forget_memory(memory_id)
+                return json.dumps({"forgotten": True, "id": memory_id})
+            return json.dumps(self._client.forget_by_query(query))
+        except Exception as exc:
+            return json.dumps({"error": f"Forget failed: {exc}"})
+
+    def _tool_profile(self, args: dict) -> str:
+        query = str(args.get("query") or "").strip() or None
+        try:
+            profile = self._client.get_profile(query=query)
+            sections = []
+            if profile["static"]:
+                sections.append("## User Profile (Persistent)\n" + "\n".join(f"- {item}" for item in profile["static"]))
+            if profile["dynamic"]:
+                sections.append("## Recent Context\n" + "\n".join(f"- {item}" for item in profile["dynamic"]))
+            return json.dumps({
+                "profile": "\n\n".join(sections),
+                "static_count": len(profile["static"]),
+                "dynamic_count": len(profile["dynamic"]),
+            })
+        except Exception as exc:
+            return json.dumps({"error": f"Profile failed: {exc}"})
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+        if not self._active or not self._client:
+            return json.dumps({"error": "Supermemory is not configured"})
+        if tool_name == "supermemory_store":
+            return self._tool_store(args)
+        if tool_name == "supermemory_search":
+            return self._tool_search(args)
+        if tool_name == "supermemory_forget":
+            return self._tool_forget(args)
+        if tool_name == "supermemory_profile":
+            return self._tool_profile(args)
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+
+def register(ctx):
+    ctx.register_memory_provider(SupermemoryMemoryProvider())
@@ -0,0 +1,5 @@
+name: supermemory
+version: 1.0.0
+description: "Supermemory semantic long-term memory with profile recall, semantic search, explicit memory tools, and session ingest."
+pip_dependencies:
+  - supermemory
@@ -3444,7 +3444,22 @@ class AIAgent:
        """Normalize a Responses API object to an assistant_message-like object."""
        output = getattr(response, "output", None)
        if not isinstance(output, list) or not output:
-            raise RuntimeError("Responses API returned no output items")
+            # The Codex backend can return empty output when the answer was
+            # delivered entirely via stream events. Check output_text as a
+            # last-resort fallback before raising.
+            out_text = getattr(response, "output_text", None)
+            if isinstance(out_text, str) and out_text.strip():
+                logger.debug(
+                    "Codex response has empty output but output_text is present (%d chars); "
+                    "synthesizing output item.", len(out_text.strip()),
+                )
+                output = [SimpleNamespace(
+                    type="message", role="assistant", status="completed",
+                    content=[SimpleNamespace(type="output_text", text=out_text.strip())],
+                )]
+                response.output = output
+            else:
+                raise RuntimeError("Responses API returned no output items")

        response_status = getattr(response, "status", None)
        if isinstance(response_status, str):
@@ -3997,11 +4012,28 @@ class AIAgent:
            return stream_or_response

        terminal_response = None
+        collected_output_items: list = []
+        collected_text_deltas: list = []
        try:
            for event in stream_or_response:
                event_type = getattr(event, "type", None)
                if not event_type and isinstance(event, dict):
                    event_type = event.get("type")
+
+                # Collect output items and text deltas for backfill
+                if event_type == "response.output_item.done":
+                    done_item = getattr(event, "item", None)
+                    if done_item is None and isinstance(event, dict):
+                        done_item = event.get("item")
+                    if done_item is not None:
+                        collected_output_items.append(done_item)
+                elif event_type in ("response.output_text.delta",):
+                    delta = getattr(event, "delta", "")
+                    if not delta and isinstance(event, dict):
+                        delta = event.get("delta", "")
+                    if delta:
+                        collected_text_deltas.append(delta)
+
                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
                    continue

@@ -4009,6 +4041,26 @@ class AIAgent:
                if terminal_response is None and isinstance(event, dict):
                    terminal_response = event.get("response")
                if terminal_response is not None:
+                    # Backfill empty output from collected stream events
+                    _out = getattr(terminal_response, "output", None)
+                    if isinstance(_out, list) and not _out:
+                        if collected_output_items:
+                            terminal_response.output = list(collected_output_items)
+                            logger.debug(
+                                "Codex fallback stream: backfilled %d output items",
+                                len(collected_output_items),
+                            )
+                        elif collected_text_deltas:
+                            assembled = "".join(collected_text_deltas)
+                            terminal_response.output = [SimpleNamespace(
+                                type="message", role="assistant",
+                                status="completed",
+                                content=[SimpleNamespace(type="output_text", text=assembled)],
+                            )]
+                            logger.debug(
+                                "Codex fallback stream: synthesized from %d deltas (%d chars)",
+                                len(collected_text_deltas), len(assembled),
+                            )
                    return terminal_response
        finally:
            close_fn = getattr(stream_or_response, "close", None)
@@ -16,7 +16,7 @@ This skill guides you through systematic exploratory QA testing of web applicati

 ## Prerequisites

- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`, `browser_close`)
+- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`)
 - A target URL and testing scope from the user

 ## Inputs
@@ -148,7 +148,6 @@ Save the report to `{output_dir}/report.md`.
 | `browser_press` | Press a keyboard key |
 | `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels |
 | `browser_console` | Get JS console output and errors |
-| `browser_close` | Close the browser session |

 ## Tips

@@ -1,48 +1,106 @@
 ---
 name: blogwatcher
-description: Monitor blogs and RSS/Atom feeds for updates using the blogwatcher CLI. Add blogs, scan for new articles, and track what you've read.
-version: 1.0.0
-author: community
+description: Monitor blogs and RSS/Atom feeds for updates using the blogwatcher-cli tool. Add blogs, scan for new articles, track read status, and filter by category.
+version: 2.0.0
+author: JulienTant (fork of Hyaxia/blogwatcher)
 license: MIT
 metadata:
  hermes:
    tags: [RSS, Blogs, Feed-Reader, Monitoring]
-    homepage: https://github.com/Hyaxia/blogwatcher
+    homepage: https://github.com/JulienTant/blogwatcher-cli
 prerequisites:
-  commands: [blogwatcher]
+  commands: [blogwatcher-cli]
 ---

 # Blogwatcher

-Track blog and RSS/Atom feed updates with the `blogwatcher` CLI.
+Track blog and RSS/Atom feed updates with the `blogwatcher-cli` tool. Supports automatic feed discovery, HTML scraping fallback, OPML import, and read/unread article management.

-## Prerequisites
+## Installation

- Go installed (`go version` to check)
- Install: `go install github.com/Hyaxia/blogwatcher/cmd/blogwatcher@latest`
+Pick one method:
+
+- **Go:** `go install github.com/JulienTant/blogwatcher-cli/cmd/blogwatcher-cli@latest`
+- **Docker:** `docker run --rm -v blogwatcher-cli:/data ghcr.io/julientant/blogwatcher-cli`
+- **Binary (Linux amd64):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_linux_amd64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (Linux arm64):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_linux_arm64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (macOS Apple Silicon):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_darwin_arm64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (macOS Intel):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_darwin_amd64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+
+All releases: https://github.com/JulienTant/blogwatcher-cli/releases
+
+### Docker with persistent storage
+
+By default the database lives at `~/.blogwatcher-cli/blogwatcher-cli.db`. In Docker this is lost on container restart. Use `BLOGWATCHER_DB` or a volume mount to persist it:
+
+```bash
+# Named volume (simplest)
+docker run --rm -v blogwatcher-cli:/data -e BLOGWATCHER_DB=/data/blogwatcher-cli.db ghcr.io/julientant/blogwatcher-cli scan
+
+# Host bind mount
+docker run --rm -v /path/on/host:/data -e BLOGWATCHER_DB=/data/blogwatcher-cli.db ghcr.io/julientant/blogwatcher-cli scan
+```
+
+### Migrating from the original blogwatcher
+
+If upgrading from `Hyaxia/blogwatcher`, move your database:
+
+```bash
+mv ~/.blogwatcher/blogwatcher.db ~/.blogwatcher-cli/blogwatcher-cli.db
+```
+
+The binary name changed from `blogwatcher` to `blogwatcher-cli`.

 ## Common Commands

- Add a blog: `blogwatcher add "My Blog" https://example.com`
- List blogs: `blogwatcher blogs`
- Scan for updates: `blogwatcher scan`
- List articles: `blogwatcher articles`
- Mark an article read: `blogwatcher read 1`
- Mark all articles read: `blogwatcher read-all`
- Remove a blog: `blogwatcher remove "My Blog"`
+### Managing blogs
+
+- Add a blog: `blogwatcher-cli add "My Blog" https://example.com`
+- Add with explicit feed: `blogwatcher-cli add "My Blog" https://example.com --feed-url https://example.com/feed.xml`
+- Add with HTML scraping: `blogwatcher-cli add "My Blog" https://example.com --scrape-selector "article h2 a"`
+- List tracked blogs: `blogwatcher-cli blogs`
+- Remove a blog: `blogwatcher-cli remove "My Blog" --yes`
+- Import from OPML: `blogwatcher-cli import subscriptions.opml`
+
+### Scanning and reading
+
+- Scan all blogs: `blogwatcher-cli scan`
+- Scan one blog: `blogwatcher-cli scan "My Blog"`
+- List unread articles: `blogwatcher-cli articles`
+- List all articles: `blogwatcher-cli articles --all`
+- Filter by blog: `blogwatcher-cli articles --blog "My Blog"`
+- Filter by category: `blogwatcher-cli articles --category "Engineering"`
+- Mark article read: `blogwatcher-cli read 1`
+- Mark article unread: `blogwatcher-cli unread 1`
+- Mark all read: `blogwatcher-cli read-all`
+- Mark all read for a blog: `blogwatcher-cli read-all --blog "My Blog" --yes`
+
+## Environment Variables
+
+All flags can be set via environment variables with the `BLOGWATCHER_` prefix:
+
+| Variable | Description |
+|---|---|
+| `BLOGWATCHER_DB` | Path to SQLite database file |
+| `BLOGWATCHER_WORKERS` | Number of concurrent scan workers (default: 8) |
+| `BLOGWATCHER_SILENT` | Only output "scan done" when scanning |
+| `BLOGWATCHER_YES` | Skip confirmation prompts |
+| `BLOGWATCHER_CATEGORY` | Default filter for articles by category |

 ## Example Output

 ```
-$ blogwatcher blogs
+$ blogwatcher-cli blogs
 Tracked blogs (1):

  xkcd
    URL: https://xkcd.com
+    Feed: https://xkcd.com/atom.xml
+    Last scanned: 2026-04-03 10:30
 ```

 ```
-$ blogwatcher scan
+$ blogwatcher-cli scan
 Scanning 1 blog(s)...

  xkcd
@@ -51,6 +109,28 @@ Scanning 1 blog(s)...
 Found 4 new article(s) total!
 ```

+```
+$ blogwatcher-cli articles
+Unread articles (2):
+
+  [1] [new] Barrel - Part 13
+       Blog: xkcd
+       URL: https://xkcd.com/3095/
+       Published: 2026-04-02
+       Categories: Comics, Science
+
+  [2] [new] Volcano Fact
+       Blog: xkcd
+       URL: https://xkcd.com/3094/
+       Published: 2026-04-01
+       Categories: Comics
+```
+
 ## Notes

- Use `blogwatcher <command> --help` to discover flags and options.
+- Auto-discovers RSS/Atom feeds from blog homepages when no `--feed-url` is provided.
+- Falls back to HTML scraping if RSS fails and `--scrape-selector` is configured.
+- Categories from RSS/Atom feeds are stored and can be used to filter articles.
+- Import blogs in bulk from OPML files exported by Feedly, Inoreader, NewsBlur, etc.
+- Database stored at `~/.blogwatcher-cli/blogwatcher-cli.db` by default (override with `--db` or `BLOGWATCHER_DB`).
+- Use `blogwatcher-cli <command> --help` to discover all flags and options.
@@ -423,7 +423,7 @@ class TestBuildNousSubscriptionPrompt:
                    "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                    "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
                    "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
-                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
+                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                    "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
                },
            ),
@@ -431,9 +431,9 @@ class TestBuildNousSubscriptionPrompt:

        prompt = build_nous_subscription_prompt({"web_search", "browser_navigate"})

-        assert "Browserbase" in prompt
+        assert "Browser Use" in prompt
        assert "Modal execution is optional" in prompt
-        assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt
+        assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt

    def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
        monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
@@ -39,7 +39,7 @@ class TestHermesApiServerToolset:
        tools = resolve_toolset("hermes-api-server")
        for tool in ["browser_navigate", "browser_snapshot", "browser_click",
                      "browser_type", "browser_scroll", "browser_back",
-                      "browser_press", "browser_close"]:
+                      "browser_press"]:
            assert tool in tools, f"Missing browser tool: {tool}"

    def test_toolset_includes_homeassistant_tools(self):
@@ -0,0 +1,313 @@
+"""Regression tests: slash commands must bypass the base adapter's active-session guard.
+
+When an agent is running, the base adapter's Level 1 guard in
+handle_message() intercepts all incoming messages and queues them as
+pending.  Certain commands (/stop, /new, /reset, /approve, /deny,
+/status) must bypass this guard and be dispatched directly to the gateway
+runner — otherwise they are queued as user text and either:
+  - leak into the conversation as agent input (/stop, /new), or
+  - deadlock (/approve, /deny — agent blocks on Event.wait)
+
+These tests verify that the bypass works at the adapter level and that
+the safety net in _run_agent discards leaked command text.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class _StubAdapter(BasePlatformAdapter):
+    """Concrete adapter with abstract methods stubbed out."""
+
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        pass
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    """Create a minimal adapter for testing the active-session guard."""
+    config = PlatformConfig(enabled=True, token="test-token")
+    adapter = _StubAdapter(config, Platform.TELEGRAM)
+    adapter.sent_responses = []
+
+    async def _mock_handler(event):
+        cmd = event.get_command()
+        return f"handled:{cmd}" if cmd else f"handled:text:{event.text}"
+
+    adapter._message_handler = _mock_handler
+
+    async def _mock_send_retry(chat_id, content, **kwargs):
+        adapter.sent_responses.append(content)
+
+    adapter._send_with_retry = _mock_send_retry
+    return adapter
+
+
+def _make_event(text="/stop", chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
+
+
+def _session_key(chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return build_session_key(source)
+
+
+# ---------------------------------------------------------------------------
+# Tests: commands bypass Level 1 when session is active
+# ---------------------------------------------------------------------------
+
+
+class TestCommandBypassActiveSession:
+    """Commands that must bypass the active-session guard."""
+
+    @pytest.mark.asyncio
+    async def test_stop_bypasses_guard(self):
+        """/stop must be dispatched directly, not queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/stop"))
+
+        assert sk not in adapter._pending_messages, (
+            "/stop was queued as a pending message instead of being dispatched"
+        )
+        assert any("handled:stop" in r for r in adapter.sent_responses), (
+            "/stop response was not sent back to the user"
+        )
+
+    @pytest.mark.asyncio
+    async def test_new_bypasses_guard(self):
+        """/new must be dispatched directly, not queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/new"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:new" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_reset_bypasses_guard(self):
+        """/reset (alias for /new) must be dispatched directly."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/reset"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:reset" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_approve_bypasses_guard(self):
+        """/approve must bypass (deadlock prevention)."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/approve"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:approve" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_deny_bypasses_guard(self):
+        """/deny must bypass (deadlock prevention)."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/deny"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:deny" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_status_bypasses_guard(self):
+        """/status must bypass so it returns a system response."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/status"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:status" in r for r in adapter.sent_responses)
+
+
+# ---------------------------------------------------------------------------
+# Tests: non-bypass messages still get queued
+# ---------------------------------------------------------------------------
+
+
+class TestNonBypassStillQueued:
+    """Regular messages and unknown commands must be queued, not dispatched."""
+
+    @pytest.mark.asyncio
+    async def test_regular_text_queued(self):
+        """Plain text while agent is running must be queued as pending."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("hello world"))
+
+        assert sk in adapter._pending_messages, (
+            "Regular text was not queued — it should be pending"
+        )
+        assert len(adapter.sent_responses) == 0, (
+            "Regular text should not produce a direct response"
+        )
+
+    @pytest.mark.asyncio
+    async def test_unknown_command_queued(self):
+        """Unknown /commands must be queued, not dispatched."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/foobar"))
+
+        assert sk in adapter._pending_messages
+        assert len(adapter.sent_responses) == 0
+
+    @pytest.mark.asyncio
+    async def test_file_path_not_treated_as_command(self):
+        """A message like '/path/to/file' must not bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/path/to/file.py"))
+
+        assert sk in adapter._pending_messages
+        assert len(adapter.sent_responses) == 0
+
+
+# ---------------------------------------------------------------------------
+# Tests: no active session — commands go through normally
+# ---------------------------------------------------------------------------
+
+
+class TestNoActiveSessionNormalDispatch:
+    """When no agent is running, messages spawn a background task normally."""
+
+    @pytest.mark.asyncio
+    async def test_stop_when_no_session_active(self):
+        """/stop without an active session spawns a background task
+        (the Level 2 handler will return 'No active task')."""
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        # No active session — _active_sessions is empty
+        assert sk not in adapter._active_sessions
+
+        await adapter.handle_message(_make_event("/stop"))
+
+        # Should have gone through the normal path (background task spawned)
+        # and NOT be in _pending_messages (that's the queued-during-active path)
+        assert sk not in adapter._pending_messages
+
+
+# ---------------------------------------------------------------------------
+# Tests: safety net in _run_agent discards command text from pending queue
+# ---------------------------------------------------------------------------
+
+
+class TestPendingCommandSafetyNet:
+    """The safety net in gateway/run.py _run_agent must discard command text
+    that leaks into the pending queue via interrupt_message fallback."""
+
+    def test_stop_command_detected(self):
+        """resolve_command must recognize /stop so the safety net can
+        discard it."""
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("stop") is not None
+        assert resolve_command("stop").name == "stop"
+
+    def test_new_command_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("new") is not None
+        assert resolve_command("new").name == "new"
+
+    def test_reset_alias_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("reset") is not None
+        assert resolve_command("reset").name == "new"  # alias
+
+    def test_unknown_command_not_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("foobar") is None
+
+    def test_file_path_not_detected_as_command(self):
+        """'/path/to/file' should not resolve as a command."""
+        from hermes_cli.commands import resolve_command
+
+        # The safety net splits on whitespace and takes the first word
+        # after stripping '/'.  For '/path/to/file', that's 'path/to/file'.
+        assert resolve_command("path/to/file") is None
+
+
+# ---------------------------------------------------------------------------
+# Tests: bypass with @botname suffix (Telegram-style)
+# ---------------------------------------------------------------------------
+
+
+class TestBypassWithBotnameSuffix:
+    """Telegram appends @botname to commands. The bypass must still work."""
+
+    @pytest.mark.asyncio
+    async def test_stop_with_botname(self):
+        """/stop@MyHermesBot must bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/stop@MyHermesBot"))
+
+        assert sk not in adapter._pending_messages, (
+            "/stop@MyHermesBot was queued instead of bypassing"
+        )
+        assert any("handled:stop" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_new_with_botname(self):
+        """/new@MyHermesBot must bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/new@MyHermesBot"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:new" in r for r in adapter.sent_responses)
@@ -699,6 +699,147 @@ class TestReactions:
        assert remove_calls[0].kwargs["name"] == "eyes"


+# ---------------------------------------------------------------------------
+# TestThreadReplyHandling
+# ---------------------------------------------------------------------------
+
+
+class TestThreadReplyHandling:
+    """Test thread reply processing without explicit bot mentions."""
+
+    @pytest.fixture()
+    def mock_session_store(self):
+        """Create a mock session store with entries dict."""
+        store = MagicMock()
+        store._entries = {}
+        store._ensure_loaded = MagicMock()
+        store.config = MagicMock()
+        store.config.group_sessions_per_user = True
+        return store
+
+    @pytest.fixture()
+    def adapter_with_session_store(self, mock_session_store):
+        """Create an adapter with a mock session store attached."""
+        config = PlatformConfig(enabled=True, token="***")
+        a = SlackAdapter(config)
+        a._app = MagicMock()
+        a._app.client = AsyncMock()
+        a._bot_user_id = "U_BOT"
+        a._team_bot_user_ids = {"T_TEAM": "U_BOT"}
+        a._running = True
+        a.handle_message = AsyncMock()
+        a.set_session_store(mock_session_store)
+        return a
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_without_mention_no_session_ignored(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies without mention should be ignored if no active session."""
+        mock_session_store._entries = {}  # No active sessions
+
+        event = {
+            "text": "Just replying in the thread",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",  # Different from ts - this is a reply
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_without_mention_with_session_processed(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies without mention should be processed if there's an active session."""
+        # Simulate an active session for this thread
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "Follow-up question",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",  # Reply in thread 123.000
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_called_once()
+
+        # Verify the text is passed through unchanged (no mention stripping needed)
+        msg_event = adapter_with_session_store.handle_message.call_args[0][0]
+        assert msg_event.text == "Follow-up question"
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_with_mention_strips_bot_id(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies with @mention should still strip the bot ID."""
+        # Even with a session, mentions should be stripped
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "<@U_BOT> thanks for the help",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_called_once()
+
+        msg_event = adapter_with_session_store.handle_message.call_args[0][0]
+        assert "<@U_BOT>" not in msg_event.text
+        assert msg_event.text == "thanks for the help"
+
+    @pytest.mark.asyncio
+    async def test_top_level_message_requires_mention_even_with_session(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Top-level channel messages should require mention even if session exists."""
+        # Session exists but this is a top-level message (no thread_ts)
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "New question without mention",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "456.789",
+            # No thread_ts - this is a top-level message
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_no_session_store_ignores_thread_replies(
+        self, adapter
+    ):
+        """If no session store is attached, thread replies without mention should be ignored."""
+        # adapter fixture has no session store attached
+        event = {
+            "text": "Thread reply without mention",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter._handle_slack_message(event)
+        adapter.handle_message.assert_not_called()
+
+
 # ---------------------------------------------------------------------------
 # TestUserNameResolution
 # ---------------------------------------------------------------------------
@@ -177,3 +177,150 @@ class TestStreamRunMediaStripping:
            assert "MEDIA:" not in sent_text, f"MEDIA: leaked into display: {sent_text!r}"

        assert consumer.already_sent
+
+
+# ── Segment break (tool boundary) tests ──────────────────────────────────
+
+
+class TestSegmentBreakOnToolBoundary:
+    """Verify that on_delta(None) finalizes the current message and starts a
+    new one so the final response appears below tool-progress messages."""
+
+    @pytest.mark.asyncio
+    async def test_segment_break_creates_new_message(self):
+        """After a None boundary, next text creates a fresh message."""
+        adapter = MagicMock()
+        send_result_1 = SimpleNamespace(success=True, message_id="msg_1")
+        send_result_2 = SimpleNamespace(success=True, message_id="msg_2")
+        edit_result = SimpleNamespace(success=True)
+        adapter.send = AsyncMock(side_effect=[send_result_1, send_result_2])
+        adapter.edit_message = AsyncMock(return_value=edit_result)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # Phase 1: intermediate text before tool calls
+        consumer.on_delta("Let me search for that...")
+        # Tool boundary — model is about to call tools
+        consumer.on_delta(None)
+        # Phase 2: final response text after tools finished
+        consumer.on_delta("Here are the results.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Should have sent TWO separate messages (two adapter.send calls),
+        # not just edited the first one.
+        assert adapter.send.call_count == 2
+        first_text = adapter.send.call_args_list[0][1]["content"]
+        second_text = adapter.send.call_args_list[1][1]["content"]
+        assert "search" in first_text
+        assert "results" in second_text
+
+    @pytest.mark.asyncio
+    async def test_segment_break_no_text_before(self):
+        """A None boundary with no preceding text is a no-op."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # No text before the boundary — model went straight to tool calls
+        consumer.on_delta(None)
+        consumer.on_delta("Final answer.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Only one send call (the final answer)
+        assert adapter.send.call_count == 1
+        assert "Final answer" in adapter.send.call_args_list[0][1]["content"]
+
+    @pytest.mark.asyncio
+    async def test_segment_break_removes_cursor(self):
+        """The finalized segment message should not have a cursor."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        edit_result = SimpleNamespace(success=True)
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=edit_result)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Thinking...")
+        consumer.on_delta(None)
+        consumer.on_delta("Done.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # The first segment should have been finalized without cursor.
+        # Check all edit_message calls + the initial send for the first segment.
+        # The last state of msg_1 should NOT have the cursor.
+        all_texts = []
+        for call in adapter.send.call_args_list:
+            all_texts.append(call[1].get("content", ""))
+        for call in adapter.edit_message.call_args_list:
+            all_texts.append(call[1].get("content", ""))
+
+        # Find the text(s) that contain "Thinking" — the finalized version
+        # should not have the cursor.
+        thinking_texts = [t for t in all_texts if "Thinking" in t]
+        assert thinking_texts, "Expected at least one message with 'Thinking'"
+        # The LAST occurrence is the finalized version
+        assert "▉" not in thinking_texts[-1], (
+            f"Cursor found in finalized segment: {thinking_texts[-1]!r}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_multiple_segment_breaks(self):
+        """Multiple tool boundaries create multiple message segments."""
+        adapter = MagicMock()
+        msg_counter = iter(["msg_1", "msg_2", "msg_3"])
+        adapter.send = AsyncMock(
+            side_effect=lambda **kw: SimpleNamespace(success=True, message_id=next(msg_counter))
+        )
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Phase 1")
+        consumer.on_delta(None)  # tool boundary
+        consumer.on_delta("Phase 2")
+        consumer.on_delta(None)  # another tool boundary
+        consumer.on_delta("Phase 3")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Three separate messages
+        assert adapter.send.call_count == 3
+
+    @pytest.mark.asyncio
+    async def test_already_sent_stays_true_after_segment(self):
+        """already_sent remains True after a segment break."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Text")
+        consumer.on_delta(None)
+        consumer.finish()
+
+        await consumer.run()
+
+        assert consumer.already_sent
@@ -44,7 +44,62 @@ def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monke
    assert features.modal.direct_override is False


-def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase(monkeypatch):
+def test_get_nous_subscription_features_marks_browser_use_as_managed_when_gateway_ready(monkeypatch):
+    monkeypatch.setattr(ns, "get_env_value", lambda name: "")
+    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
+    monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
+    monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
+    monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
+    monkeypatch.setattr(
+        ns,
+        "is_managed_tool_gateway_ready",
+        lambda vendor: vendor == "browser-use",
+    )
+
+    features = ns.get_nous_subscription_features(
+        {"browser": {"cloud_provider": "browser-use"}}
+    )
+
+    assert features.browser.available is True
+    assert features.browser.active is True
+    assert features.browser.managed_by_nous is True
+    assert features.browser.direct_override is False
+    assert features.browser.current_provider == "Browser Use"
+
+
+def test_get_nous_subscription_features_uses_direct_browserbase_when_no_managed_gateway(monkeypatch):
+    """When direct Browserbase keys are set and no managed gateway is available,
+    the unconfigured fallback should pick Browserbase as a direct provider."""
+    env = {
+        "BROWSERBASE_API_KEY": "bb-key",
+        "BROWSERBASE_PROJECT_ID": "bb-project",
+    }
+
+    monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
+    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
+    monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
+    monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
+    monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
+    monkeypatch.setattr(
+        ns,
+        "is_managed_tool_gateway_ready",
+        lambda vendor: False,  # No managed gateway available
+    )
+
+    features = ns.get_nous_subscription_features({})
+
+    assert features.browser.available is True
+    assert features.browser.active is True
+    assert features.browser.managed_by_nous is False
+    assert features.browser.direct_override is True
+    assert features.browser.current_provider == "Browserbase"
+
+
+def test_get_nous_subscription_features_prefers_camofox_over_managed_browser_use(monkeypatch):
    env = {"CAMOFOX_URL": "http://localhost:9377"}

    monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
@@ -57,11 +112,11 @@ def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase
    monkeypatch.setattr(
        ns,
        "is_managed_tool_gateway_ready",
-        lambda vendor: vendor == "browserbase",
+        lambda vendor: vendor == "browser-use",
    )

    features = ns.get_nous_subscription_features(
-        {"browser": {"cloud_provider": "browserbase"}}
+        {"browser": {"cloud_provider": "browser-use"}}
    )

    assert features.browser.available is True
@@ -88,7 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
                "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
                "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
-                "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
+                "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
            },
        ),
@@ -330,7 +330,7 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):

    assert config["web"]["backend"] == "firecrawl"
    assert config["tts"]["provider"] == "openai"
-    assert config["browser"]["cloud_provider"] == "browserbase"
+    assert config["browser"]["cloud_provider"] == "browser-use"
    assert configured == []

 # ── Platform / toolset consistency ────────────────────────────────────────────
@@ -0,0 +1,257 @@
+import json
+import threading
+
+import pytest
+
+from plugins.memory.supermemory import (
+    SupermemoryMemoryProvider,
+    _clean_text_for_capture,
+    _format_prefetch_context,
+    _load_supermemory_config,
+    _save_supermemory_config,
+)
+
+
+class FakeClient:
+    def __init__(self, api_key: str, timeout: float, container_tag: str):
+        self.api_key = api_key
+        self.timeout = timeout
+        self.container_tag = container_tag
+        self.add_calls = []
+        self.search_results = []
+        self.profile_response = {"static": [], "dynamic": [], "search_results": []}
+        self.ingest_calls = []
+        self.forgotten_ids = []
+        self.forget_by_query_response = {"success": True, "message": "Forgot"}
+
+    def add_memory(self, content, metadata=None, *, entity_context=""):
+        self.add_calls.append({
+            "content": content,
+            "metadata": metadata,
+            "entity_context": entity_context,
+        })
+        return {"id": "mem_123"}
+
+    def search_memories(self, query, *, limit=5):
+        return self.search_results
+
+    def get_profile(self, query=None):
+        return self.profile_response
+
+    def forget_memory(self, memory_id):
+        self.forgotten_ids.append(memory_id)
+
+    def forget_by_query(self, query):
+        return self.forget_by_query_response
+
+    def ingest_conversation(self, session_id, messages):
+        self.ingest_calls.append({"session_id": session_id, "messages": messages})
+
+
+@pytest.fixture
+def provider(monkeypatch, tmp_path):
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    p = SupermemoryMemoryProvider()
+    p.initialize("session-1", hermes_home=str(tmp_path), platform="cli")
+    return p
+
+
+def test_is_available_false_without_api_key(monkeypatch):
+    monkeypatch.delenv("SUPERMEMORY_API_KEY", raising=False)
+    p = SupermemoryMemoryProvider()
+    assert p.is_available() is False
+
+
+def test_is_available_false_when_import_missing(monkeypatch):
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+
+    import builtins
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "supermemory":
+            raise ImportError("missing")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    p = SupermemoryMemoryProvider()
+    assert p.is_available() is False
+
+
+def test_load_and_save_config_round_trip(tmp_path):
+    _save_supermemory_config({"container_tag": "demo-tag", "auto_capture": False}, str(tmp_path))
+    cfg = _load_supermemory_config(str(tmp_path))
+    assert cfg["container_tag"] == "demo_tag"
+    assert cfg["auto_capture"] is False
+    assert cfg["auto_recall"] is True
+
+
+def test_clean_text_for_capture_strips_injected_context():
+    text = "hello\n<supermemory-context>ignore me</supermemory-context>\nworld"
+    assert _clean_text_for_capture(text) == "hello\nworld"
+
+
+def test_format_prefetch_context_deduplicates_overlap():
+    result = _format_prefetch_context(
+        static_facts=["Jordan prefers short answers"],
+        dynamic_facts=["Jordan prefers short answers", "Uses Hermes"],
+        search_results=[{"memory": "Uses Hermes", "similarity": 0.9}],
+        max_results=10,
+    )
+    assert result.count("Jordan prefers short answers") == 1
+    assert result.count("Uses Hermes") == 1
+    assert "<supermemory-context>" in result
+
+
+def test_prefetch_includes_profile_on_first_turn(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers short answers"],
+        "dynamic": ["Current project is Supermemory provider"],
+        "search_results": [{"memory": "Working on Hermes memory provider", "similarity": 0.88}],
+    }
+    provider.on_turn_start(1, "start")
+    result = provider.prefetch("what am I working on?")
+    assert "User Profile (Persistent)" in result
+    assert "Recent Context" in result
+    assert "Relevant Memories" in result
+
+
+def test_prefetch_skips_profile_between_frequency(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers short answers"],
+        "dynamic": ["Current project is Supermemory provider"],
+        "search_results": [{"memory": "Working on Hermes memory provider", "similarity": 0.88}],
+    }
+    provider.on_turn_start(2, "next")
+    result = provider.prefetch("what am I working on?")
+    assert "Relevant Memories" in result
+    assert "User Profile (Persistent)" not in result
+
+
+def test_sync_turn_skips_trivial_message(provider):
+    provider.sync_turn("ok", "sure", session_id="session-1")
+    assert provider._client.add_calls == []
+
+
+def test_sync_turn_persists_cleaned_exchange(provider):
+    provider.sync_turn(
+        "Please remember this\n<supermemory-context>ignore</supermemory-context>",
+        "Got it, storing the context",
+        session_id="session-1",
+    )
+    provider._sync_thread.join(timeout=1)
+    assert len(provider._client.add_calls) == 1
+    content = provider._client.add_calls[0]["content"]
+    assert "ignore" not in content
+    assert "[role: user]" in content
+    assert "[role: assistant]" in content
+
+
+def test_on_session_end_ingests_clean_messages(provider):
+    messages = [
+        {"role": "system", "content": "skip"},
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "hi there"},
+    ]
+    provider.on_session_end(messages)
+    assert len(provider._client.ingest_calls) == 1
+    payload = provider._client.ingest_calls[0]
+    assert payload["session_id"] == "session-1"
+    assert payload["messages"] == [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "hi there"},
+    ]
+
+
+def test_on_memory_write_tracks_thread(provider):
+    provider.on_memory_write("add", "memory", "Jordan likes concise docs")
+    assert provider._write_thread is not None
+    provider._write_thread.join(timeout=1)
+    assert len(provider._client.add_calls) == 1
+    assert provider._client.add_calls[0]["metadata"]["type"] == "explicit_memory"
+
+
+def test_shutdown_joins_and_clears_threads(provider, monkeypatch):
+    started = threading.Event()
+    release = threading.Event()
+
+    def slow_add_memory(content, metadata=None, *, entity_context=""):
+        started.set()
+        release.wait(timeout=1)
+        provider._client.add_calls.append({
+            "content": content,
+            "metadata": metadata,
+            "entity_context": entity_context,
+        })
+        return {"id": "mem_slow"}
+
+    monkeypatch.setattr(provider._client, "add_memory", slow_add_memory)
+
+    provider.sync_turn(
+        "Please remember this request in long-term memory",
+        "Absolutely, I will keep that in long-term memory.",
+        session_id="session-1",
+    )
+    assert started.wait(timeout=1)
+    assert provider._sync_thread is not None
+
+    started.clear()
+    provider.on_memory_write("add", "memory", "Jordan likes concise docs")
+    assert started.wait(timeout=1)
+    assert provider._write_thread is not None
+
+    release.set()
+    provider.shutdown()
+
+    assert provider._sync_thread is None
+    assert provider._write_thread is None
+    assert provider._prefetch_thread is None
+    assert len(provider._client.add_calls) == 2
+
+
+def test_store_tool_returns_saved_payload(provider):
+    result = json.loads(provider.handle_tool_call("supermemory_store", {"content": "Jordan likes concise docs"}))
+    assert result["saved"] is True
+    assert result["id"] == "mem_123"
+
+
+def test_search_tool_formats_results(provider):
+    provider._client.search_results = [
+        {"id": "m1", "memory": "Jordan likes concise docs", "similarity": 0.92}
+    ]
+    result = json.loads(provider.handle_tool_call("supermemory_search", {"query": "concise docs"}))
+    assert result["count"] == 1
+    assert result["results"][0]["similarity"] == 92
+
+
+def test_forget_tool_by_id(provider):
+    result = json.loads(provider.handle_tool_call("supermemory_forget", {"id": "m1"}))
+    assert result == {"forgotten": True, "id": "m1"}
+    assert provider._client.forgotten_ids == ["m1"]
+
+
+def test_forget_tool_by_query(provider):
+    provider._client.forget_by_query_response = {"success": True, "message": "Forgot one", "id": "m7"}
+    result = json.loads(provider.handle_tool_call("supermemory_forget", {"query": "that thing"}))
+    assert result["success"] is True
+    assert result["id"] == "m7"
+
+
+def test_profile_tool_formats_sections(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers concise docs"],
+        "dynamic": ["Working on Supermemory provider"],
+        "search_results": [],
+    }
+    result = json.loads(provider.handle_tool_call("supermemory_profile", {}))
+    assert result["static_count"] == 1
+    assert result["dynamic_count"] == 1
+    assert "User Profile (Persistent)" in result["profile"]
+
+
+def test_handle_tool_call_returns_error_when_unconfigured(monkeypatch):
+    monkeypatch.delenv("SUPERMEMORY_API_KEY", raising=False)
+    p = SupermemoryMemoryProvider()
+    result = json.loads(p.handle_tool_call("supermemory_search", {"query": "x"}))
+    assert "error" in result
@@ -350,6 +350,7 @@ class TestResolveApiKeyProviderCredentials:

    def test_resolve_zai_with_key(self, monkeypatch):
        monkeypatch.setenv("GLM_API_KEY", "glm-secret-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
        creds = resolve_api_key_provider_credentials("zai")
        assert creds["provider"] == "zai"
        assert creds["api_key"] == "glm-secret-key"
@@ -471,6 +472,7 @@ class TestResolveApiKeyProviderCredentials:
        """GLM_API_KEY takes priority over ZAI_API_KEY."""
        monkeypatch.setenv("GLM_API_KEY", "primary")
        monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
        creds = resolve_api_key_provider_credentials("zai")
        assert creds["api_key"] == "primary"
        assert creds["source"] == "GLM_API_KEY"
@@ -478,6 +480,7 @@ class TestResolveApiKeyProviderCredentials:
    def test_zai_key_fallback(self, monkeypatch):
        """ZAI_API_KEY used when GLM_API_KEY not set."""
        monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
        creds = resolve_api_key_provider_credentials("zai")
        assert creds["api_key"] == "secondary"
        assert creds["source"] == "ZAI_API_KEY"
@@ -830,11 +833,58 @@ class TestKimiCodeCredentialAutoDetect:

    def test_non_kimi_providers_unaffected(self, monkeypatch):
        """Ensure the auto-detect logic doesn't leak to other providers."""
-        monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt")
+        monkeypatch.setenv("GLM_API_KEY", "sk-kim...isnt")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
        creds = resolve_api_key_provider_credentials("zai")
        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"


+class TestZaiEndpointAutoDetect:
+    """Test that resolve_api_key_provider_credentials auto-detects Z.AI endpoints."""
+
+    def test_probe_success_returns_detected_url(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-coding-key")
+        monkeypatch.setattr(
+            "hermes_cli.auth.detect_zai_endpoint",
+            lambda *a, **kw: {
+                "id": "coding-global",
+                "base_url": "https://api.z.ai/api/coding/paas/v4",
+                "model": "glm-4.7",
+                "label": "Global (Coding Plan)",
+            },
+        )
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/coding/paas/v4"
+
+    def test_probe_failure_falls_back_to_default(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
+
+    def test_env_override_skips_probe(self, monkeypatch):
+        """GLM_BASE_URL should always win without probing."""
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setenv("GLM_BASE_URL", "https://custom.example/v4")
+        probe_called = False
+
+        def _never_called(*a, **kw):
+            nonlocal probe_called
+            probe_called = True
+            return None
+
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", _never_called)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://custom.example/v4"
+        assert not probe_called
+
+    def test_no_key_skips_probe(self, monkeypatch):
+        """Without an API key, no probe should occur."""
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["api_key"] == ""
+
+
 # =============================================================================
 # Kimi / Moonshot model list isolation tests
 # =============================================================================
@@ -947,7 +947,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                        "auth_type": "api_key",
                        "priority": 0,
                        "source": "manual",
-                        "access_token": "sk-ant-xxx",
+                        "access_token": "***",
                    }
                ],
                "custom:together.ai": [
@@ -957,7 +957,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                        "auth_type": "api_key",
                        "priority": 0,
                        "source": "manual",
-                        "access_token": "sk-tog-xxx",
+                        "access_token": "***",
                    }
                ],
                "custom:fireworks": [
@@ -967,7 +967,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                        "auth_type": "api_key",
                        "priority": 0,
                        "source": "manual",
-                        "access_token": "sk-fw-xxx",
+                        "access_token": "***",
                    }
                ],
                "custom:empty": [],
@@ -980,3 +980,78 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
    result = list_custom_pool_providers()
    assert result == ["custom:fireworks", "custom:together.ai"]
    # "custom:empty" not included because it's empty
+
+
+
+def test_acquire_lease_prefers_unleased_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    first = pool.acquire_lease()
+    second = pool.acquire_lease()
+
+    assert first == "cred-1"
+    assert second == "cred-2"
+    assert pool.active_lease_count("cred-1") == 1
+    assert pool.active_lease_count("cred-2") == 1
+
+
+
+def test_release_lease_decrements_counter(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    }
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    leased = pool.acquire_lease()
+    assert leased == "cred-1"
+    assert pool.active_lease_count("cred-1") == 1
+
+    pool.release_lease("cred-1")
+    assert pool.active_lease_count("cred-1") == 0
@@ -996,6 +996,89 @@ def test_custom_provider_no_key_gets_placeholder(monkeypatch):
    assert resolved["base_url"] == "http://localhost:8080/v1"


+def test_auto_detected_nous_auth_failure_falls_through_to_openrouter(monkeypatch):
+    """When auto-detect picks Nous but credentials are revoked, fall through to OpenRouter."""
+    from hermes_cli.auth import AuthError
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    # resolve_provider returns "nous" (stale active_provider in auth.json)
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    # load_pool returns empty pool so we hit the direct credential resolution
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    # Nous credential resolution fails with revoked token
+    monkeypatch.setattr(
+        rp, "resolve_nous_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Refresh session has been revoked",
+                      provider="nous", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    # With requested="auto", should fall through to OpenRouter
+    resolved = rp.resolve_runtime_provider(requested="auto")
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "test-or-key"
+
+
+def test_auto_detected_codex_auth_failure_falls_through_to_openrouter(monkeypatch):
+    """When auto-detect picks Codex but credentials are revoked, fall through to OpenRouter."""
+    from hermes_cli.auth import AuthError
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    monkeypatch.setattr(
+        rp, "resolve_codex_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Codex token refresh failed: session revoked",
+                      provider="openai-codex", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="auto")
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "test-or-key"
+
+
+def test_explicit_nous_auth_failure_still_raises(monkeypatch):
+    """When user explicitly requests Nous and auth fails, the error should propagate."""
+    from hermes_cli.auth import AuthError
+    import pytest
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    monkeypatch.setattr(
+        rp, "resolve_nous_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Refresh session has been revoked",
+                      provider="nous", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    # With explicit "nous", should raise — don't silently switch providers
+    with pytest.raises(AuthError, match="Refresh session has been revoked"):
+        rp.resolve_runtime_provider(requested="nous")
+
+
 def test_openrouter_provider_not_affected_by_custom_fix(monkeypatch):
    """Fixing custom must not change openrouter behavior."""
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
@@ -45,3 +45,35 @@ class TestResolveCdpOverride:

        with patch("tools.browser_tool.requests.get", side_effect=RuntimeError("boom")):
            assert _resolve_cdp_override(HTTP_URL) == HTTP_URL
+
+    def test_normalizes_provider_returned_http_cdp_url_when_creating_session(self, monkeypatch):
+        import tools.browser_tool as browser_tool
+
+        provider = Mock()
+        provider.create_session.return_value = {
+            "session_name": "cloud-session",
+            "bb_session_id": "bu_123",
+            "cdp_url": "https://cdp.browser-use.example/session",
+            "features": {"browser_use": True},
+        }
+
+        response = Mock()
+        response.raise_for_status.return_value = None
+        response.json.return_value = {"webSocketDebuggerUrl": WS_URL}
+
+        monkeypatch.setattr(browser_tool, "_active_sessions", {})
+        monkeypatch.setattr(browser_tool, "_session_last_activity", {})
+        monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
+        monkeypatch.setattr(browser_tool, "_update_session_activity", lambda task_id: None)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "")
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+
+        with patch("tools.browser_tool.requests.get", return_value=response) as mock_get:
+            session_info = browser_tool._get_session_info("task-browser-use")
+
+        assert session_info["cdp_url"] == WS_URL
+        provider.create_session.assert_called_once_with("task-browser-use")
+        mock_get.assert_called_once_with(
+            "https://cdp.browser-use.example/session/json/version",
+            timeout=10,
+        )
@@ -65,18 +65,6 @@ class TestBrowserCleanup:
        mock_stop.assert_called_once_with("task-1")
        mock_run.assert_called_once_with("task-1", "close", [], timeout=10)

-    def test_browser_close_delegates_to_cleanup_browser(self):
-        import json
-
-        browser_tool = self.browser_tool
-        browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
-
-        with patch("tools.browser_tool.cleanup_browser") as mock_cleanup:
-            result = json.loads(browser_tool.browser_close("task-2"))
-
-        assert result == {"success": True, "closed": True}
-        mock_cleanup.assert_called_once_with("task-2")
-
    def test_emergency_cleanup_clears_all_tracking_state(self):
        browser_tool = self.browser_tool
        browser_tool._cleanup_done = False
@@ -26,6 +26,7 @@ from tools.delegate_tool import (
    _build_child_agent,
    _build_child_system_prompt,
    _strip_blocked_tools,
+    _resolve_child_credential_pool,
    _resolve_delegation_credentials,
 )

@@ -930,5 +931,126 @@ class TestDelegationProviderIntegration(unittest.TestCase):
            self.assertEqual(kwargs["base_url"], parent.base_url)


+class TestChildCredentialPoolResolution(unittest.TestCase):
+    def test_same_provider_shares_parent_pool(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        result = _resolve_child_credential_pool("openrouter", parent)
+        self.assertIs(result, mock_pool)
+
+    def test_no_provider_inherits_parent_pool(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        result = _resolve_child_credential_pool(None, parent)
+        self.assertIs(result, mock_pool)
+
+    def test_different_provider_loads_own_pool(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = True
+
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIs(result, mock_pool)
+
+    def test_different_provider_empty_pool_returns_none(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = False
+
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIsNone(result)
+
+    def test_different_provider_load_failure_returns_none(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+
+        with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIsNone(result)
+
+    def test_build_child_agent_assigns_parent_pool_when_shared(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            MockAgent.return_value = mock_child
+
+            _build_child_agent(
+                task_index=0,
+                goal="Test pool assignment",
+                context=None,
+                toolsets=["terminal"],
+                model=None,
+                max_iterations=10,
+                parent_agent=parent,
+            )
+
+            self.assertEqual(mock_child._credential_pool, mock_pool)
+
+
+class TestChildCredentialLeasing(unittest.TestCase):
+    def test_run_single_child_acquires_and_releases_lease(self):
+        from tools.delegate_tool import _run_single_child
+
+        leased_entry = MagicMock()
+        leased_entry.id = "cred-b"
+
+        child = MagicMock()
+        child._credential_pool = MagicMock()
+        child._credential_pool.acquire_lease.return_value = "cred-b"
+        child._credential_pool.current.return_value = leased_entry
+        child.run_conversation.return_value = {
+            "final_response": "done",
+            "completed": True,
+            "interrupted": False,
+            "api_calls": 1,
+            "messages": [],
+        }
+
+        result = _run_single_child(
+            task_index=0,
+            goal="Investigate rate limits",
+            child=child,
+            parent_agent=_make_mock_parent(),
+        )
+
+        self.assertEqual(result["status"], "completed")
+        child._credential_pool.acquire_lease.assert_called_once_with()
+        child._swap_credential.assert_called_once_with(leased_entry)
+        child._credential_pool.release_lease.assert_called_once_with("cred-b")
+
+    def test_run_single_child_releases_lease_after_failure(self):
+        from tools.delegate_tool import _run_single_child
+
+        child = MagicMock()
+        child._credential_pool = MagicMock()
+        child._credential_pool.acquire_lease.return_value = "cred-a"
+        child._credential_pool.current.return_value = MagicMock(id="cred-a")
+        child.run_conversation.side_effect = RuntimeError("boom")
+
+        result = _run_single_child(
+            task_index=1,
+            goal="Trigger failure",
+            child=child,
+            parent_agent=_make_mock_parent(),
+        )
+
+        self.assertEqual(result["status"], "error")
+        child._credential_pool.release_lease.assert_called_once_with("cred-a")
+
+
 if __name__ == "__main__":
    unittest.main()
@@ -113,16 +113,15 @@ def _install_fake_tools_package():
    sys.modules["tools.environments.managed_modal"] = types.SimpleNamespace(ManagedModalEnvironment=_DummyEnvironment)


-def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
+def test_browser_use_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
    _install_fake_tools_package()
    (tmp_path / "config.yaml").write_text("browser:\n  cloud_provider: local\n", encoding="utf-8")
    env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
    env.update({
        "HERMES_HOME": str(tmp_path),
        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
    })

    with patch.dict(os.environ, env, clear=True):
@@ -135,7 +134,7 @@ def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_i
    assert provider is None


-def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
+def test_browserbase_does_not_use_gateway_only_configuration():
    _install_fake_tools_package()
    env = os.environ.copy()
    env.pop("BROWSERBASE_API_KEY", None)
@@ -145,104 +144,124 @@ def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_
        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
    })

-    class _Response:
-        status_code = 200
-        ok = True
-        text = ""
-        headers = {"x-external-call-id": "call-browserbase-1"}
-
-        def json(self):
-            return {
-                "id": "bb_local_session_1",
-                "connectUrl": "wss://connect.browserbase.example/session",
-            }
-
-    with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
-        )
-
-        with patch.object(browserbase_module.requests, "post", return_value=_Response()) as post:
-            provider = browserbase_module.BrowserbaseProvider()
-            session = provider.create_session("task-browserbase-managed")
-
-    sent_headers = post.call_args.kwargs["headers"]
-    assert sent_headers["X-BB-API-Key"] == "nous-token"
-    assert sent_headers["X-Idempotency-Key"].startswith("browserbase-session-create:")
-    assert session["external_call_id"] == "call-browserbase-1"
-
-
-def test_browserbase_managed_gateway_reuses_pending_idempotency_key_after_timeout():
-    _install_fake_tools_package()
-    env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
-    env.update({
-        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
-    })
-
-    class _Response:
-        status_code = 200
-        ok = True
-        text = ""
-        headers = {"x-external-call-id": "call-browserbase-2"}
-
-        def json(self):
-            return {
-                "id": "bb_local_session_2",
-                "connectUrl": "wss://connect.browserbase.example/session2",
-            }
-
    with patch.dict(os.environ, env, clear=True):
        browserbase_module = _load_tool_module(
            "tools.browser_providers.browserbase",
            "browser_providers/browserbase.py",
        )
        provider = browserbase_module.BrowserbaseProvider()
-        timeout = browserbase_module.requests.Timeout("timed out")
+
+    assert provider.is_configured() is False
+
+
+def test_browser_use_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
+    _install_fake_tools_package()
+    env = os.environ.copy()
+    env.pop("BROWSER_USE_API_KEY", None)
+    env.update({
+        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
+    })
+
+    class _Response:
+        status_code = 200
+        ok = True
+        text = ""
+        headers = {"x-external-call-id": "call-browser-use-1"}
+
+        def json(self):
+            return {
+                "id": "bu_local_session_1",
+                "connectUrl": "wss://connect.browser-use.example/session",
+            }
+
+    with patch.dict(os.environ, env, clear=True):
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
+        )
+
+        with patch.object(browser_use_module.requests, "post", return_value=_Response()) as post:
+            provider = browser_use_module.BrowserUseProvider()
+            session = provider.create_session("task-browser-use-managed")
+
+    sent_headers = post.call_args.kwargs["headers"]
+    assert sent_headers["X-Browser-Use-API-Key"] == "nous-token"
+    assert sent_headers["X-Idempotency-Key"].startswith("browser-use-session-create:")
+    sent_payload = post.call_args.kwargs["json"]
+    assert sent_payload["timeout"] == 5
+    assert sent_payload["proxyCountryCode"] == "us"
+    assert session["external_call_id"] == "call-browser-use-1"
+
+
+def test_browser_use_managed_gateway_reuses_pending_idempotency_key_after_timeout():
+    _install_fake_tools_package()
+    env = os.environ.copy()
+    env.pop("BROWSER_USE_API_KEY", None)
+    env.update({
+        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
+    })
+
+    class _Response:
+        status_code = 200
+        ok = True
+        text = ""
+        headers = {"x-external-call-id": "call-browser-use-2"}
+
+        def json(self):
+            return {
+                "id": "bu_local_session_2",
+                "connectUrl": "wss://connect.browser-use.example/session2",
+            }
+
+    with patch.dict(os.environ, env, clear=True):
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
+        )
+        provider = browser_use_module.BrowserUseProvider()
+        timeout = browser_use_module.requests.Timeout("timed out")

        with patch.object(
-            browserbase_module.requests,
+            browser_use_module.requests,
            "post",
            side_effect=[timeout, _Response()],
        ) as post:
            try:
-                provider.create_session("task-browserbase-timeout")
-            except browserbase_module.requests.Timeout:
+                provider.create_session("task-browser-use-timeout")
+            except browser_use_module.requests.Timeout:
                pass
            else:
-                raise AssertionError("Expected Browserbase create_session to propagate timeout")
+                raise AssertionError("Expected Browser Use create_session to propagate timeout")

-            provider.create_session("task-browserbase-timeout")
+            provider.create_session("task-browser-use-timeout")

    first_headers = post.call_args_list[0].kwargs["headers"]
    second_headers = post.call_args_list[1].kwargs["headers"]
    assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]


-def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
+def test_browser_use_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
    _install_fake_tools_package()
    env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
    env.update({
        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
    })

    class _ConflictResponse:
        status_code = 409
        ok = False
-        text = '{"error":{"code":"CONFLICT","message":"Managed Browserbase session creation is already in progress for this idempotency key"}}'
+        text = '{"error":{"code":"CONFLICT","message":"Managed Browser Use session creation is already in progress for this idempotency key"}}'
        headers = {}

        def json(self):
            return {
                "error": {
                    "code": "CONFLICT",
-                    "message": "Managed Browserbase session creation is already in progress for this idempotency key",
+                    "message": "Managed Browser Use session creation is already in progress for this idempotency key",
                }
            }

@@ -250,72 +269,71 @@ def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_pr
        status_code = 200
        ok = True
        text = ""
-        headers = {"x-external-call-id": "call-browserbase-4"}
+        headers = {"x-external-call-id": "call-browser-use-4"}

        def json(self):
            return {
-                "id": "bb_local_session_4",
-                "connectUrl": "wss://connect.browserbase.example/session4",
+                "id": "bu_local_session_4",
+                "connectUrl": "wss://connect.browser-use.example/session4",
            }

    with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
        )
-        provider = browserbase_module.BrowserbaseProvider()
+        provider = browser_use_module.BrowserUseProvider()

        with patch.object(
-            browserbase_module.requests,
+            browser_use_module.requests,
            "post",
            side_effect=[_ConflictResponse(), _SuccessResponse()],
        ) as post:
            try:
-                provider.create_session("task-browserbase-conflict")
+                provider.create_session("task-browser-use-conflict")
            except RuntimeError:
                pass
            else:
-                raise AssertionError("Expected Browserbase create_session to propagate the in-progress conflict")
+                raise AssertionError("Expected Browser Use create_session to propagate the in-progress conflict")

-            provider.create_session("task-browserbase-conflict")
+            provider.create_session("task-browser-use-conflict")

    first_headers = post.call_args_list[0].kwargs["headers"]
    second_headers = post.call_args_list[1].kwargs["headers"]
    assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]


-def test_browserbase_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
+def test_browser_use_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
    _install_fake_tools_package()
    env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
    env.update({
        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
    })

    class _Response:
        status_code = 200
        ok = True
        text = ""
-        headers = {"x-external-call-id": "call-browserbase-3"}
+        headers = {"x-external-call-id": "call-browser-use-3"}

        def json(self):
            return {
-                "id": "bb_local_session_3",
-                "connectUrl": "wss://connect.browserbase.example/session3",
+                "id": "bu_local_session_3",
+                "connectUrl": "wss://connect.browser-use.example/session3",
            }

    with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
        )
-        provider = browserbase_module.BrowserbaseProvider()
+        provider = browser_use_module.BrowserUseProvider()

-        with patch.object(browserbase_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
-            provider.create_session("task-browserbase-new")
-            provider.create_session("task-browserbase-new")
+        with patch.object(browser_use_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
+            provider.create_session("task-browser-use-new")
+            provider.create_session("task-browser-use-new")

    first_headers = post.call_args_list[0].kwargs["headers"]
    second_headers = post.call_args_list[1].kwargs["headers"]
@@ -40,17 +40,17 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override():
        os.environ,
        {
            "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
-            "BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/",
+            "BROWSER_USE_GATEWAY_URL": "http://browser-use-gateway.localhost:3009/",
        },
        clear=False,
    ):
        result = resolve_managed_tool_gateway(
-            "browserbase",
+            "browser-use",
            token_reader=lambda: "nous-token",
        )

    assert result is not None
-    assert result.gateway_origin == "http://browserbase-gateway.localhost:3009"
+    assert result.gateway_origin == "http://browser-use-gateway.localhost:3009"


 def test_resolve_managed_tool_gateway_is_inactive_without_nous_token():
@@ -0,0 +1,247 @@
+"""Tests for notify_on_complete background process feature.
+
+Covers:
+  - ProcessSession.notify_on_complete field
+  - ProcessRegistry.completion_queue population on _move_to_finished()
+  - Checkpoint persistence of notify_on_complete
+  - Terminal tool schema includes notify_on_complete
+  - Terminal tool handler passes notify_on_complete through
+"""
+
+import json
+import os
+import queue
+import time
+import pytest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from tools.process_registry import (
+    ProcessRegistry,
+    ProcessSession,
+)
+
+
+@pytest.fixture()
+def registry():
+    """Create a fresh ProcessRegistry."""
+    return ProcessRegistry()
+
+
+def _make_session(
+    sid="proc_test_notify",
+    command="echo hello",
+    task_id="t1",
+    exited=False,
+    exit_code=None,
+    output="",
+    notify_on_complete=False,
+) -> ProcessSession:
+    s = ProcessSession(
+        id=sid,
+        command=command,
+        task_id=task_id,
+        started_at=time.time(),
+        exited=exited,
+        exit_code=exit_code,
+        output_buffer=output,
+        notify_on_complete=notify_on_complete,
+    )
+    return s
+
+
+# =========================================================================
+# ProcessSession field
+# =========================================================================
+
+class TestProcessSessionField:
+    def test_default_false(self):
+        s = ProcessSession(id="proc_1", command="echo hi")
+        assert s.notify_on_complete is False
+
+    def test_set_true(self):
+        s = ProcessSession(id="proc_1", command="echo hi", notify_on_complete=True)
+        assert s.notify_on_complete is True
+
+
+# =========================================================================
+# Completion queue
+# =========================================================================
+
+class TestCompletionQueue:
+    def test_queue_exists(self, registry):
+        assert hasattr(registry, "completion_queue")
+        assert registry.completion_queue.empty()
+
+    def test_move_to_finished_no_notify(self, registry):
+        """Processes without notify_on_complete don't enqueue."""
+        s = _make_session(notify_on_complete=False, output="done")
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+        assert registry.completion_queue.empty()
+
+    def test_move_to_finished_with_notify(self, registry):
+        """Processes with notify_on_complete push to queue."""
+        s = _make_session(
+            notify_on_complete=True,
+            output="build succeeded",
+            exit_code=0,
+        )
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        assert not registry.completion_queue.empty()
+        completion = registry.completion_queue.get_nowait()
+        assert completion["session_id"] == s.id
+        assert completion["command"] == "echo hello"
+        assert completion["exit_code"] == 0
+        assert "build succeeded" in completion["output"]
+
+    def test_move_to_finished_nonzero_exit(self, registry):
+        """Nonzero exit codes are captured correctly."""
+        s = _make_session(
+            notify_on_complete=True,
+            output="FAILED",
+            exit_code=1,
+        )
+        s.exited = True
+        s.exit_code = 1
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        completion = registry.completion_queue.get_nowait()
+        assert completion["exit_code"] == 1
+        assert "FAILED" in completion["output"]
+
+    def test_output_truncated_to_2000(self, registry):
+        """Long output is truncated to last 2000 chars."""
+        long_output = "x" * 5000
+        s = _make_session(
+            notify_on_complete=True,
+            output=long_output,
+        )
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        completion = registry.completion_queue.get_nowait()
+        assert len(completion["output"]) == 2000
+
+    def test_multiple_completions_queued(self, registry):
+        """Multiple notify processes all push to the same queue."""
+        for i in range(3):
+            s = _make_session(
+                sid=f"proc_{i}",
+                notify_on_complete=True,
+                output=f"output_{i}",
+            )
+            s.exited = True
+            s.exit_code = 0
+            registry._running[s.id] = s
+            with patch.object(registry, "_write_checkpoint"):
+                registry._move_to_finished(s)
+
+        completions = []
+        while not registry.completion_queue.empty():
+            completions.append(registry.completion_queue.get_nowait())
+        assert len(completions) == 3
+        ids = {c["session_id"] for c in completions}
+        assert ids == {"proc_0", "proc_1", "proc_2"}
+
+
+# =========================================================================
+# Checkpoint persistence
+# =========================================================================
+
+class TestCheckpointNotify:
+    def test_checkpoint_includes_notify(self, registry, tmp_path):
+        with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
+            s = _make_session(notify_on_complete=True)
+            registry._running[s.id] = s
+            registry._write_checkpoint()
+
+            data = json.loads((tmp_path / "procs.json").read_text())
+            assert len(data) == 1
+            assert data[0]["notify_on_complete"] is True
+
+    def test_checkpoint_without_notify(self, registry, tmp_path):
+        with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
+            s = _make_session(notify_on_complete=False)
+            registry._running[s.id] = s
+            registry._write_checkpoint()
+
+            data = json.loads((tmp_path / "procs.json").read_text())
+            assert data[0]["notify_on_complete"] is False
+
+    def test_recover_preserves_notify(self, registry, tmp_path):
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+            "notify_on_complete": True,
+        }]))
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            s = registry.get("proc_live")
+            assert s.notify_on_complete is True
+
+    def test_recover_defaults_false(self, registry, tmp_path):
+        """Old checkpoint entries without the field default to False."""
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+        }]))
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            s = registry.get("proc_live")
+            assert s.notify_on_complete is False
+
+
+# =========================================================================
+# Terminal tool schema
+# =========================================================================
+
+class TestTerminalSchema:
+    def test_schema_has_notify_on_complete(self):
+        from tools.terminal_tool import TERMINAL_SCHEMA
+        props = TERMINAL_SCHEMA["parameters"]["properties"]
+        assert "notify_on_complete" in props
+        assert props["notify_on_complete"]["type"] == "boolean"
+        assert props["notify_on_complete"]["default"] is False
+
+    def test_handler_passes_notify(self):
+        """_handle_terminal passes notify_on_complete to terminal_tool."""
+        from tools.terminal_tool import _handle_terminal
+        with patch("tools.terminal_tool.terminal_tool", return_value='{"ok":true}') as mock_tt:
+            _handle_terminal(
+                {"command": "echo hi", "background": True, "notify_on_complete": True},
+                task_id="t1",
+            )
+            _, kwargs = mock_tt.call_args
+            assert kwargs["notify_on_complete"] is True
+
+
+# =========================================================================
+# Code execution blocked params
+# =========================================================================
+
+class TestCodeExecutionBlocked:
+    def test_notify_on_complete_blocked_in_sandbox(self):
+        from tools.code_execution_tool import _TERMINAL_BLOCKED_PARAMS
+        assert "notify_on_complete" in _TERMINAL_BLOCKED_PARAMS
@@ -871,3 +871,7 @@ def check_all_command_guards(command: str, env_type: str,

    return {"approved": True, "message": None,
            "user_approved": True, "description": combined_desc}
+
+
+# Load permanent allowlist from config on module import
+load_permanent_allowlist()
@@ -240,6 +240,25 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
                "Browser is visible via VNC. "
                "Share this link with the user so they can watch the browser live."
            )
+
+        # Auto-take a compact snapshot so the model can act immediately
+        try:
+            snap_data = _get(
+                f"/tabs/{session['tab_id']}/snapshot",
+                params={"userId": session["user_id"]},
+            )
+            snapshot_text = snap_data.get("snapshot", "")
+            from tools.browser_tool import (
+                SNAPSHOT_SUMMARIZE_THRESHOLD,
+                _truncate_snapshot,
+            )
+            if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
+                snapshot_text = _truncate_snapshot(snapshot_text)
+            result["snapshot"] = snapshot_text
+            result["element_count"] = snap_data.get("refsCount", 0)
+        except Exception:
+            pass  # Navigation succeeded; snapshot is a bonus
+
        return json.dumps(result)
    except requests.HTTPError as e:
        return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
@@ -2,16 +2,62 @@

 import logging
 import os
+import threading
 import uuid
-from typing import Dict
+from typing import Any, Dict, Optional

 import requests

 from tools.browser_providers.base import CloudBrowserProvider
+from tools.managed_tool_gateway import resolve_managed_tool_gateway
+from tools.tool_backend_helpers import managed_nous_tools_enabled

 logger = logging.getLogger(__name__)
+_pending_create_keys: Dict[str, str] = {}
+_pending_create_keys_lock = threading.Lock()

-_BASE_URL = "https://api.browser-use.com/api/v2"
+_BASE_URL = "https://api.browser-use.com/api/v3"
+_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
+_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
+
+
+def _get_or_create_pending_create_key(task_id: str) -> str:
+    with _pending_create_keys_lock:
+        existing = _pending_create_keys.get(task_id)
+        if existing:
+            return existing
+
+        created = f"browser-use-session-create:{uuid.uuid4().hex}"
+        _pending_create_keys[task_id] = created
+        return created
+
+
+def _clear_pending_create_key(task_id: str) -> None:
+    with _pending_create_keys_lock:
+        _pending_create_keys.pop(task_id, None)
+
+
+def _should_preserve_pending_create_key(response: requests.Response) -> bool:
+    if response.status_code >= 500:
+        return True
+
+    if response.status_code != 409:
+        return False
+
+    try:
+        payload = response.json()
+    except Exception:
+        return False
+
+    if not isinstance(payload, dict):
+        return False
+
+    error = payload.get("error")
+    if not isinstance(error, dict):
+        return False
+
+    message = str(error.get("message") or "").lower()
+    return "already in progress" in message


 class BrowserUseProvider(CloudBrowserProvider):
@@ -21,55 +67,120 @@ class BrowserUseProvider(CloudBrowserProvider):
        return "Browser Use"

    def is_configured(self) -> bool:
-        return bool(os.environ.get("BROWSER_USE_API_KEY"))
+        return self._get_config_or_none() is not None
+
+    # ------------------------------------------------------------------
+    # Config resolution (direct API key OR managed Nous gateway)
+    # ------------------------------------------------------------------
+
+    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+        api_key = os.environ.get("BROWSER_USE_API_KEY")
+        if api_key:
+            return {
+                "api_key": api_key,
+                "base_url": _BASE_URL,
+                "managed_mode": False,
+            }
+
+        managed = resolve_managed_tool_gateway("browser-use")
+        if managed is None:
+            return None
+
+        return {
+            "api_key": managed.nous_user_token,
+            "base_url": managed.gateway_origin.rstrip("/"),
+            "managed_mode": True,
+        }
+
+    def _get_config(self) -> Dict[str, Any]:
+        config = self._get_config_or_none()
+        if config is None:
+            message = (
+                "Browser Use requires a direct BROWSER_USE_API_KEY credential."
+            )
+            if managed_nous_tools_enabled():
+                message = (
+                    "Browser Use requires either a direct BROWSER_USE_API_KEY "
+                    "credential or a managed Browser Use gateway configuration."
+                )
+            raise ValueError(message)
+        return config

    # ------------------------------------------------------------------
    # Session lifecycle
    # ------------------------------------------------------------------

-    def _headers(self) -> Dict[str, str]:
-        api_key = os.environ.get("BROWSER_USE_API_KEY")
-        if not api_key:
-            raise ValueError(
-                "BROWSER_USE_API_KEY environment variable is required. "
-                "Get your key at https://browser-use.com"
-            )
-        return {
+    def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
+        headers = {
            "Content-Type": "application/json",
-            "X-Browser-Use-API-Key": api_key,
+            "X-Browser-Use-API-Key": config["api_key"],
        }
+        return headers

    def create_session(self, task_id: str) -> Dict[str, object]:
+        config = self._get_config()
+        managed_mode = bool(config.get("managed_mode"))
+
+        headers = self._headers(config)
+        if managed_mode:
+            headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
+
+        # Keep gateway-backed sessions short so billing authorization does not
+        # default to a long Browser-Use timeout when Hermes only needs a task-
+        # scoped ephemeral browser.
+        payload = (
+            {
+                "timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
+                "proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
+            }
+            if managed_mode
+            else {}
+        )
+
        response = requests.post(
-            f"{_BASE_URL}/browsers",
-            headers=self._headers(),
-            json={},
+            f"{config['base_url']}/browsers",
+            headers=headers,
+            json=payload,
            timeout=30,
        )

        if not response.ok:
+            if managed_mode and not _should_preserve_pending_create_key(response):
+                _clear_pending_create_key(task_id)
            raise RuntimeError(
                f"Failed to create Browser Use session: "
                f"{response.status_code} {response.text}"
            )

        session_data = response.json()
+        if managed_mode:
+            _clear_pending_create_key(task_id)
        session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
+        external_call_id = response.headers.get("x-external-call-id") if managed_mode else None

        logger.info("Created Browser Use session %s", session_name)

+        cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
+
        return {
            "session_name": session_name,
            "bb_session_id": session_data["id"],
-            "cdp_url": session_data["cdpUrl"],
+            "cdp_url": cdp_url,
            "features": {"browser_use": True},
+            "external_call_id": external_call_id,
        }

    def close_session(self, session_id: str) -> bool:
+        try:
+            config = self._get_config()
+        except ValueError:
+            logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
+            return False
+
        try:
            response = requests.patch(
-                f"{_BASE_URL}/browsers/{session_id}",
-                headers=self._headers(),
+                f"{config['base_url']}/browsers/{session_id}",
+                headers=self._headers(config),
                json={"action": "stop"},
                timeout=10,
            )
@@ -89,17 +200,14 @@ class BrowserUseProvider(CloudBrowserProvider):
            return False

    def emergency_cleanup(self, session_id: str) -> None:
-        api_key = os.environ.get("BROWSER_USE_API_KEY")
-        if not api_key:
+        config = self._get_config_or_none()
+        if config is None:
            logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
            return
        try:
            requests.patch(
-                f"{_BASE_URL}/browsers/{session_id}",
-                headers={
-                    "Content-Type": "application/json",
-                    "X-Browser-Use-API-Key": api_key,
-                },
+                f"{config['base_url']}/browsers/{session_id}",
+                headers=self._headers(config),
                json={"action": "stop"},
                timeout=5,
            )
@@ -1,63 +1,24 @@
-"""Browserbase cloud browser provider."""
+"""Browserbase cloud browser provider (direct credentials only)."""

 import logging
 import os
-import threading
 import uuid
 from typing import Any, Dict, Optional

 import requests

 from tools.browser_providers.base import CloudBrowserProvider
-from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled

 logger = logging.getLogger(__name__)
-_pending_create_keys: Dict[str, str] = {}
-_pending_create_keys_lock = threading.Lock()
-
-
-def _get_or_create_pending_create_key(task_id: str) -> str:
-    with _pending_create_keys_lock:
-        existing = _pending_create_keys.get(task_id)
-        if existing:
-            return existing
-
-        created = f"browserbase-session-create:{uuid.uuid4().hex}"
-        _pending_create_keys[task_id] = created
-        return created
-
-
-def _clear_pending_create_key(task_id: str) -> None:
-    with _pending_create_keys_lock:
-        _pending_create_keys.pop(task_id, None)
-
-
-def _should_preserve_pending_create_key(response: requests.Response) -> bool:
-    if response.status_code >= 500:
-        return True
-
-    if response.status_code != 409:
-        return False
-
-    try:
-        payload = response.json()
-    except Exception:
-        return False
-
-    if not isinstance(payload, dict):
-        return False
-
-    error = payload.get("error")
-    if not isinstance(error, dict):
-        return False
-
-    message = str(error.get("message") or "").lower()
-    return "already in progress" in message


 class BrowserbaseProvider(CloudBrowserProvider):
-    """Browserbase (https://browserbase.com) cloud browser backend."""
+    """Browserbase (https://browserbase.com) cloud browser backend.
+
+    This provider requires direct BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID
+    credentials.  Managed Nous gateway support has been removed — the Nous
+    subscription now routes through Browser Use instead.
+    """

    def provider_name(self) -> str:
        return "Browserbase"
@@ -77,37 +38,20 @@ class BrowserbaseProvider(CloudBrowserProvider):
                "api_key": api_key,
                "project_id": project_id,
                "base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
-                "managed_mode": False,
            }
-
-        managed = resolve_managed_tool_gateway("browserbase")
-        if managed is None:
-            return None
-
-        return {
-            "api_key": managed.nous_user_token,
-            "project_id": "managed",
-            "base_url": managed.gateway_origin.rstrip("/"),
-            "managed_mode": True,
-        }
+        return None

    def _get_config(self) -> Dict[str, Any]:
        config = self._get_config_or_none()
        if config is None:
-            message = (
-                "Browserbase requires direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials."
+            raise ValueError(
+                "Browserbase requires BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID "
+                "environment variables."
            )
-            if managed_nous_tools_enabled():
-                message = (
-                    "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID "
-                    "credentials or a managed Browserbase gateway configuration."
-                )
-            raise ValueError(message)
        return config

    def create_session(self, task_id: str) -> Dict[str, object]:
        config = self._get_config()
-        managed_mode = bool(config.get("managed_mode"))

        # Optional env-var knobs
        enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
@@ -147,8 +91,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
            "Content-Type": "application/json",
            "X-BB-API-Key": config["api_key"],
        }
-        if managed_mode:
-            headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)

        response = requests.post(
            f"{config['base_url']}/v1/sessions",
@@ -161,7 +103,7 @@ class BrowserbaseProvider(CloudBrowserProvider):
        keepalive_fallback = False

        # Handle 402 — paid features unavailable
-        if response.status_code == 402 and not managed_mode:
+        if response.status_code == 402:
            if enable_keep_alive:
                keepalive_fallback = True
                logger.warning(
@@ -191,18 +133,13 @@ class BrowserbaseProvider(CloudBrowserProvider):
                )

        if not response.ok:
-            if managed_mode and not _should_preserve_pending_create_key(response):
-                _clear_pending_create_key(task_id)
            raise RuntimeError(
                f"Failed to create Browserbase session: "
                f"{response.status_code} {response.text}"
            )

        session_data = response.json()
-        if managed_mode:
-            _clear_pending_create_key(task_id)
        session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
-        external_call_id = response.headers.get("x-external-call-id") if managed_mode else None

        if enable_proxies and not proxies_fallback:
            features_enabled["proxies"] = True
@@ -221,7 +158,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
            "bb_session_id": session_data["id"],
            "cdp_url": session_data["connectUrl"],
            "features": features_enabled,
-            "external_call_id": external_call_id,
        }

    def close_session(self, session_id: str) -> bool:
@@ -3,10 +3,10 @@
 Browser Tool Module

 This module provides browser automation tools using agent-browser CLI.  It
-supports two backends — **Browserbase** (cloud) and **local Chromium** — with
-identical agent-facing behaviour.  The backend is auto-detected: if
-``BROWSERBASE_API_KEY`` is set the cloud service is used; otherwise a local
-headless Chromium instance is launched automatically.
+supports multiple backends — **Browser Use** (cloud, default for Nous
+subscribers), **Browserbase** (cloud, direct credentials), and **local
+Chromium** — with identical agent-facing behaviour.  The backend is
+auto-detected from config and available credentials.

 The tool uses agent-browser's accessibility tree (ariaSnapshot) for text-based
 page representation, making it ideal for LLM agents without vision capabilities.
@@ -17,8 +17,7 @@ Features:
  ``agent-browser install`` (downloads Chromium) or
  ``agent-browser install --with-deps`` (also installs system libraries for
  Debian/Ubuntu/Docker).
- **Cloud mode**: Browserbase cloud execution with stealth features, proxies,
-  and CAPTCHA solving.  Activated when BROWSERBASE_API_KEY is set.
+- **Cloud mode**: Browserbase or Browser Use cloud execution when configured.
 - Session isolation per task ID
 - Text-based page snapshots using accessibility tree
 - Element interaction via ref selectors (@e1, @e2, etc.)
@@ -26,8 +25,9 @@ Features:
 - Automatic cleanup of browser sessions

 Environment Variables:
- BROWSERBASE_API_KEY: API key for Browserbase (enables cloud mode)
- BROWSERBASE_PROJECT_ID: Project ID for Browserbase (required for cloud mode)
+- BROWSERBASE_API_KEY: API key for direct Browserbase cloud mode
+- BROWSERBASE_PROJECT_ID: Project ID for direct Browserbase cloud mode
+- BROWSER_USE_API_KEY: API key for direct Browser Use cloud mode
 - BROWSERBASE_PROXIES: Enable/disable residential proxies (default: "true")
 - BROWSERBASE_ADVANCED_STEALTH: Enable advanced stealth mode with custom Chromium,
  requires Scale Plan (default: "false")
@@ -280,23 +280,19 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
        logger.debug("Could not read cloud_provider from config: %s", e)

    if _cached_cloud_provider is None:
-        fallback_provider = BrowserbaseProvider()
+        # Prefer Browser Use (managed Nous gateway or direct API key),
+        # fall back to Browserbase (direct credentials only).
+        fallback_provider = BrowserUseProvider()
        if fallback_provider.is_configured():
            _cached_cloud_provider = fallback_provider
+        else:
+            fallback_provider = BrowserbaseProvider()
+            if fallback_provider.is_configured():
+                _cached_cloud_provider = fallback_provider

    return _cached_cloud_provider


-def _get_browserbase_config_or_none() -> Optional[Dict[str, Any]]:
-    """Return Browserbase direct or managed config, or None when unavailable."""
-    return BrowserbaseProvider()._get_config_or_none()
-
-
-def _get_browserbase_config() -> Dict[str, Any]:
-    """Return Browserbase config or raise when neither direct nor managed mode is available."""
-    return BrowserbaseProvider()._get_config()
-
-
 def _is_local_mode() -> bool:
    """Return True when the browser tool will use a local browser backend."""
    if _get_cdp_override():
@@ -518,7 +514,7 @@ atexit.register(_stop_browser_cleanup_thread)
 BROWSER_TOOL_SCHEMAS = [
    {
        "name": "browser_navigate",
-        "description": "Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need to interact with a page (click, fill forms, dynamic content).",
+        "description": "Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need to interact with a page (click, fill forms, dynamic content). Returns a compact page snapshot with interactive elements and ref IDs — no need to call browser_snapshot separately after navigating.",
        "parameters": {
            "type": "object",
            "properties": {
@@ -532,7 +528,7 @@ BROWSER_TOOL_SCHEMAS = [
    },
    {
        "name": "browser_snapshot",
-        "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) for browser_click and browser_type. full=false (default): compact view with interactive elements. full=true: complete page content. Snapshots over 8000 chars are truncated or LLM-summarized. Requires browser_navigate first.",
+        "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) for browser_click and browser_type. full=false (default): compact view with interactive elements. full=true: complete page content. Snapshots over 8000 chars are truncated or LLM-summarized. Requires browser_navigate first. Note: browser_navigate already returns a compact snapshot — use this to refresh after interactions that change the page, or with full=true for complete content.",
        "parameters": {
            "type": "object",
            "properties": {
@@ -617,7 +613,7 @@ BROWSER_TOOL_SCHEMAS = [
    },
    {
        "name": "browser_close",
-        "description": "Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota.",
+        "description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.",
        "parameters": {
            "type": "object",
            "properties": {},
@@ -744,6 +740,11 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
            session_info = _create_local_session(task_id)
        else:
            session_info = provider.create_session(task_id)
+            if session_info.get("cdp_url"):
+                # Some cloud providers (including Browser-Use v3) return an HTTP
+                # CDP discovery URL instead of a raw websocket endpoint.
+                session_info = dict(session_info)
+                session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
    
    with _cleanup_lock:
        # Double-check: another thread may have created a session while we
@@ -1229,7 +1230,22 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
                    "Consider upgrading Browserbase plan for proxy support."
                )
            response["stealth_features"] = active_features
-        
+
+        # Auto-take a compact snapshot so the model can act immediately
+        # without a separate browser_snapshot call.
+        try:
+            snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"])
+            if snap_result.get("success"):
+                snap_data = snap_result.get("data", {})
+                snapshot_text = snap_data.get("snapshot", "")
+                refs = snap_data.get("refs", {})
+                if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
+                    snapshot_text = _truncate_snapshot(snapshot_text)
+                response["snapshot"] = snapshot_text
+                response["element_count"] = len(refs) if refs else 0
+        except Exception as e:
+            logger.debug("Auto-snapshot after navigate failed: %s", e)
+
        return json.dumps(response, ensure_ascii=False)
    else:
        return json.dumps({
@@ -1376,31 +1392,40 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
    Returns:
        JSON string with scroll result
    """
-    if _is_camofox_mode():
-        from tools.browser_camofox import camofox_scroll
-        return camofox_scroll(direction, task_id)
-
-    effective_task_id = task_id or "default"
-    
    # Validate direction
    if direction not in ["up", "down"]:
        return json.dumps({
            "success": False,
            "error": f"Invalid direction '{direction}'. Use 'up' or 'down'."
        }, ensure_ascii=False)
-    
-    result = _run_browser_command(effective_task_id, "scroll", [direction])
-    
-    if result.get("success"):
-        return json.dumps({
-            "success": True,
-            "scrolled": direction
-        }, ensure_ascii=False)
-    else:
-        return json.dumps({
-            "success": False,
-            "error": result.get("error", f"Failed to scroll {direction}")
-        }, ensure_ascii=False)
+
+    # Repeat the scroll 5 times to get meaningful page movement.
+    # Most backends scroll ~100px per call, which is barely visible.
+    # 5x gives roughly half a viewport of travel, backend-agnostic.
+    _SCROLL_REPEATS = 5
+
+    if _is_camofox_mode():
+        from tools.browser_camofox import camofox_scroll
+        result = None
+        for _ in range(_SCROLL_REPEATS):
+            result = camofox_scroll(direction, task_id)
+        return result
+
+    effective_task_id = task_id or "default"
+
+    result = None
+    for _ in range(_SCROLL_REPEATS):
+        result = _run_browser_command(effective_task_id, "scroll", [direction])
+        if not result.get("success"):
+            return json.dumps({
+                "success": False,
+                "error": result.get("error", f"Failed to scroll {direction}")
+            }, ensure_ascii=False)
+
+    return json.dumps({
+        "success": True,
+        "scrolled": direction
+    }, ensure_ascii=False)


 def browser_back(task_id: Optional[str] = None) -> str:
@@ -1463,33 +1488,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
        }, ensure_ascii=False)


-def browser_close(task_id: Optional[str] = None) -> str:
-    """
-    Close the browser session.

-    Args:
-        task_id: Task identifier for session isolation
-
-    Returns:
-        JSON string with close result
-    """
-    if _is_camofox_mode():
-        from tools.browser_camofox import camofox_close
-        return camofox_close(task_id)
-
-    effective_task_id = task_id or "default"
-    with _cleanup_lock:
-        had_session = effective_task_id in _active_sessions
-
-    cleanup_browser(effective_task_id)
-
-    response = {
-        "success": True,
-        "closed": True,
-    }
-    if not had_session:
-        response["warning"] = "Session may not have been active"
-    return json.dumps(response, ensure_ascii=False)


 def browser_console(clear: bool = False, expression: Optional[str] = None, task_id: Optional[str] = None) -> str:
@@ -1942,7 +1941,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
    Clean up browser session for a task.
    
    Called automatically when a task completes or when inactivity timeout is reached.
-    Closes both the agent-browser session and the Browserbase session.
+    Closes both the agent-browser/Browserbase session and Camofox sessions.
    
    Args:
        task_id: Task identifier to clean up
@@ -1950,6 +1949,14 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
    if task_id is None:
        task_id = "default"
    
+    # Also clean up Camofox session if running in Camofox mode
+    if _is_camofox_mode():
+        try:
+            from tools.browser_camofox import camofox_close
+            camofox_close(task_id)
+        except Exception as e:
+            logger.debug("Camofox cleanup for task %s: %s", task_id, e)
+
    logger.debug("cleanup_browser called for task_id: %s", task_id)
    logger.debug("Active sessions: %s", list(_active_sessions.keys()))
    
@@ -2168,14 +2175,7 @@ registry.register(
    check_fn=check_browser_requirements,
    emoji="⌨️",
 )
-registry.register(
-    name="browser_close",
-    toolset="browser",
-    schema=_BROWSER_SCHEMA_MAP["browser_close"],
-    handler=lambda args, **kw: browser_close(task_id=kw.get("task_id")),
-    check_fn=check_browser_requirements,
-    emoji="🚪",
-)
+
 registry.register(
    name="browser_get_images",
    toolset="browser",
@@ -300,7 +300,7 @@ def _call(tool_name, args):
 # ---------------------------------------------------------------------------

 # Terminal parameters that must not be used from ephemeral sandbox scripts
-_TERMINAL_BLOCKED_PARAMS = {"background", "check_interval", "pty"}
+_TERMINAL_BLOCKED_PARAMS = {"background", "check_interval", "pty", "notify_on_complete"}


 def _rpc_server_loop(
@@ -103,6 +103,32 @@ def _canonical_skills(skill: Optional[str] = None, skills: Optional[Any] = None)



+
+def _resolve_model_override(model_obj: Optional[Dict[str, Any]]) -> tuple:
+    """Resolve a model override object into (provider, model) for job storage.
+
+    If provider is omitted, pins the current main provider from config so the
+    job doesn't drift when the user later changes their default via hermes model.
+
+    Returns (provider_str_or_none, model_str_or_none).
+    """
+    if not model_obj or not isinstance(model_obj, dict):
+        return (None, None)
+    model_name = (model_obj.get("model") or "").strip() or None
+    provider_name = (model_obj.get("provider") or "").strip() or None
+    if model_name and not provider_name:
+        # Pin to the current main provider so the job is stable
+        try:
+            from hermes_cli.config import load_config
+            cfg = load_config()
+            model_cfg = cfg.get("model", {})
+            if isinstance(model_cfg, dict):
+                provider_name = model_cfg.get("provider") or None
+        except Exception:
+            pass  # Best-effort; provider stays None
+    return (provider_name, model_name)
+
+
 def _normalize_optional_job_value(value: Optional[Any], *, strip_trailing_slash: bool = False) -> Optional[str]:
    if value is None:
        return None
@@ -392,14 +418,9 @@ Use action='list' to inspect jobs.
 Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.

 Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
-If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
+If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
 On update, passing skills=[] clears attached skills.

-If script is provided on create, the referenced Python script runs before each agent turn.
-Its stdout is injected into the prompt as context. Use this for data collection and change
-detection — the script handles gathering data, the agent analyzes and reports.
-On update, pass script="" to clear an attached script.
-
 NOTE: The agent's final response is auto-delivered to the target. Put the primary
 user-facing content in the final response. Cron jobs run autonomously with no user
 present — they cannot ask questions or request clarification.
@@ -418,7 +439,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
            },
            "prompt": {
                "type": "string",
-                "description": "For create: the full self-contained prompt. If skill or skills are also provided, this becomes the task instruction paired with those skills."
+                "description": "For create: the full self-contained prompt. If skills are also provided, this becomes the task instruction paired with those skills."
            },
            "schedule": {
                "type": "string",
@@ -436,39 +457,30 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
                "type": "string",
                "description": "Delivery target: origin, local, telegram, discord, slack, whatsapp, signal, matrix, mattermost, homeassistant, dingtalk, feishu, wecom, email, sms, or platform:chat_id or platform:chat_id:thread_id for Telegram topics. Examples: 'origin', 'local', 'telegram', 'telegram:-1001234567890:17585', 'discord:#engineering'"
            },
-            "model": {
-                "type": "string",
-                "description": "Optional per-job model override used when the cron job runs"
-            },
-            "provider": {
-                "type": "string",
-                "description": "Optional per-job provider override used when resolving runtime credentials"
-            },
-            "base_url": {
-                "type": "string",
-                "description": "Optional per-job base URL override paired with provider/model routing"
-            },
-            "include_disabled": {
-                "type": "boolean",
-                "description": "For list: include paused/completed jobs"
-            },
-            "skill": {
-                "type": "string",
-                "description": "Optional single skill name to load before executing the cron prompt"
-            },
            "skills": {
                "type": "array",
                "items": {"type": "string"},
-                "description": "Optional ordered list of skills to load before executing the cron prompt. On update, pass an empty array to clear attached skills."
+                "description": "Optional ordered list of skill names to load before executing the cron prompt. On update, pass an empty array to clear attached skills."
            },
-            "reason": {
-                "type": "string",
-                "description": "Optional pause reason"
+            "model": {
+                "type": "object",
+                "description": "Optional per-job model override. If provider is omitted, the current main provider is pinned at creation time so the job stays stable.",
+                "properties": {
+                    "provider": {
+                        "type": "string",
+                        "description": "Provider name (e.g. 'openrouter', 'anthropic'). Omit to use and pin the current provider."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Model name (e.g. 'anthropic/claude-sonnet-4', 'claude-sonnet-4')"
+                    }
+                },
+                "required": ["model"]
            },
            "script": {
                "type": "string",
                "description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
-            }
+            },
        },
        "required": ["action"]
    }
@@ -502,7 +514,7 @@ registry.register(
    name="cronjob",
    toolset="cronjob",
    schema=CRONJOB_SCHEMA,
-    handler=lambda args, **kw: cronjob(
+    handler=lambda args, **kw: (lambda _mo=_resolve_model_override(args.get("model")): cronjob(
        action=args.get("action", ""),
        job_id=args.get("job_id"),
        prompt=args.get("prompt"),
@@ -510,16 +522,16 @@ registry.register(
        name=args.get("name"),
        repeat=args.get("repeat"),
        deliver=args.get("deliver"),
-        include_disabled=args.get("include_disabled", False),
+        include_disabled=args.get("include_disabled", True),
        skill=args.get("skill"),
        skills=args.get("skills"),
-        model=args.get("model"),
-        provider=args.get("provider"),
+        model=_mo[1],
+        provider=_mo[0] or args.get("provider"),
        base_url=args.get("base_url"),
        reason=args.get("reason"),
        script=args.get("script"),
        task_id=kw.get("task_id"),
-    ),
+    ))(),
    check_fn=check_cronjob_requirements,
    emoji="⏰",
 )
@@ -45,7 +45,12 @@ def check_delegate_requirements() -> bool:
    return True


-def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
+def _build_child_system_prompt(
+    goal: str,
+    context: Optional[str] = None,
+    *,
+    workspace_path: Optional[str] = None,
+) -> str:
    """Build a focused system prompt for a child agent."""
    parts = [
        "You are a focused subagent working on a specific delegated task.",
@@ -54,6 +59,12 @@ def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
    ]
    if context and context.strip():
        parts.append(f"\nCONTEXT:\n{context}")
+    if workspace_path and str(workspace_path).strip():
+        parts.append(
+            "\nWORKSPACE PATH:\n"
+            f"{workspace_path}\n"
+            "Use this exact path for local repository/workdir operations unless the task explicitly says otherwise."
+        )
    parts.append(
        "\nComplete this task using the tools available to you. "
        "When finished, provide a clear, concise summary of:\n"
@@ -61,12 +72,39 @@ def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
        "- What you found or accomplished\n"
        "- Any files you created or modified\n"
        "- Any issues encountered\n\n"
+        "Important workspace rule: Never assume a repository lives at /workspace/... or any other container-style path unless the task/context explicitly gives that path. "
+        "If no exact local path is provided, discover it first before issuing git/workdir-specific commands.\n\n"
        "Be thorough but concise -- your response is returned to the "
        "parent agent as a summary."
    )
    return "\n".join(parts)


+def _resolve_workspace_hint(parent_agent) -> Optional[str]:
+    """Best-effort local workspace hint for child prompts.
+
+    We only inject a path when we have a concrete absolute directory. This avoids
+    teaching subagents a fake container path while still helping them avoid
+    guessing `/workspace/...` for local repo tasks.
+    """
+    candidates = [
+        os.getenv("TERMINAL_CWD"),
+        getattr(getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None),
+        getattr(parent_agent, "terminal_cwd", None),
+        getattr(parent_agent, "cwd", None),
+    ]
+    for candidate in candidates:
+        if not candidate:
+            continue
+        try:
+            text = os.path.abspath(os.path.expanduser(str(candidate)))
+        except Exception:
+            continue
+        if os.path.isabs(text) and os.path.isdir(text):
+            return text
+    return None
+
+
 def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
    """Remove toolsets that contain only blocked tools."""
    blocked_toolset_names = {
@@ -210,7 +248,8 @@ def _build_child_agent(
    else:
        child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)

-    child_prompt = _build_child_system_prompt(goal, context)
+    workspace_hint = _resolve_workspace_hint(parent_agent)
+    child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
    # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
    parent_api_key = getattr(parent_agent, "api_key", None)
    if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
@@ -279,6 +318,12 @@ def _build_child_agent(
    # Set delegation depth so children can't spawn grandchildren
    child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1

+    # Share a credential pool with the child when possible so subagents can
+    # rotate credentials on rate limits instead of getting pinned to one key.
+    child_pool = _resolve_child_credential_pool(effective_provider, parent_agent)
+    if child_pool is not None:
+        child._credential_pool = child_pool
+
    # Register child for interrupt propagation
    if hasattr(parent_agent, '_active_children'):
        lock = getattr(parent_agent, '_active_children_lock', None)
@@ -312,6 +357,18 @@ def _run_single_child(
    _saved_tool_names = getattr(child, "_delegate_saved_tool_names",
                                list(model_tools._last_resolved_tool_names))

+    child_pool = getattr(child, '_credential_pool', None)
+    leased_cred_id = None
+    if child_pool is not None:
+        leased_cred_id = child_pool.acquire_lease()
+        if leased_cred_id is not None:
+            try:
+                leased_entry = child_pool.current()
+                if leased_entry is not None and hasattr(child, '_swap_credential'):
+                    child._swap_credential(leased_entry)
+            except Exception as exc:
+                logger.debug("Failed to bind child to leased credential: %s", exc)
+
    try:
        result = child.run_conversation(user_message=goal)

@@ -422,6 +479,12 @@ def _run_single_child(
        }

    finally:
+        if child_pool is not None and leased_cred_id is not None:
+            try:
+                child_pool.release_lease(leased_cred_id)
+            except Exception as exc:
+                logger.debug("Failed to release credential lease: %s", exc)
+
        # Restore the parent's tool names so the process-global is correct
        # for any subsequent execute_code calls or other consumers.
        import model_tools
@@ -430,6 +493,8 @@ def _run_single_child(
        if isinstance(saved_tool_names, list):
            model_tools._last_resolved_tool_names = list(saved_tool_names)

+        # Remove child from active tracking
+
        # Unregister child from interrupt propagation
        if hasattr(parent_agent, '_active_children'):
            try:
@@ -626,6 +691,38 @@ def delegate_task(
    }, ensure_ascii=False)


+def _resolve_child_credential_pool(effective_provider: Optional[str], parent_agent):
+    """Resolve a credential pool for the child agent.
+
+    Rules:
+    1. Same provider as the parent -> share the parent's pool so cooldown state
+       and rotation stay synchronized.
+    2. Different provider -> try to load that provider's own pool.
+    3. No pool available -> return None and let the child keep the inherited
+       fixed credential behavior.
+    """
+    if not effective_provider:
+        return getattr(parent_agent, "_credential_pool", None)
+
+    parent_provider = getattr(parent_agent, "provider", None) or ""
+    parent_pool = getattr(parent_agent, "_credential_pool", None)
+    if parent_pool is not None and effective_provider == parent_provider:
+        return parent_pool
+
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool(effective_provider)
+        if pool is not None and pool.has_credentials():
+            return pool
+    except Exception as exc:
+        logger.debug(
+            "Could not load credential pool for child provider '%s': %s",
+            effective_provider,
+            exc,
+        )
+    return None
+
+
 def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
    """Resolve credentials for subagent delegation.

@@ -81,6 +81,7 @@ class ProcessSession:
    watcher_chat_id: str = ""
    watcher_thread_id: str = ""
    watcher_interval: int = 0                   # 0 = no watcher configured
+    notify_on_complete: bool = False             # Queue agent notification on exit
    _lock: threading.Lock = field(default_factory=threading.Lock)
    _reader_thread: Optional[threading.Thread] = field(default=None, repr=False)
    _pty: Any = field(default=None, repr=False)  # ptyprocess handle (when use_pty=True)
@@ -112,6 +113,12 @@ class ProcessRegistry:
        # Side-channel for check_interval watchers (gateway reads after agent run)
        self.pending_watchers: List[Dict[str, Any]] = []

+        # Completion notifications — processes with notify_on_complete push here
+        # on exit.  CLI process_loop and gateway drain this after each agent turn
+        # to auto-trigger a new agent turn with the process results.
+        import queue as _queue_mod
+        self.completion_queue: _queue_mod.Queue = _queue_mod.Queue()
+
    @staticmethod
    def _clean_shell_noise(text: str) -> str:
        """Strip shell startup warnings from the beginning of output."""
@@ -415,6 +422,18 @@ class ProcessRegistry:
            self._finished[session.id] = session
        self._write_checkpoint()

+        # If the caller requested agent notification, enqueue the completion
+        # so the CLI/gateway can auto-trigger a new agent turn.
+        if session.notify_on_complete:
+            from tools.ansi_strip import strip_ansi
+            output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
+            self.completion_queue.put({
+                "session_id": session.id,
+                "command": session.command,
+                "exit_code": session.exit_code,
+                "output": output_tail,
+            })
+
    # ----- Query Methods -----

    def get(self, session_id: str) -> Optional[ProcessSession]:
@@ -721,6 +740,7 @@ class ProcessRegistry:
                            "watcher_chat_id": s.watcher_chat_id,
                            "watcher_thread_id": s.watcher_thread_id,
                            "watcher_interval": s.watcher_interval,
+                            "notify_on_complete": s.notify_on_complete,
                        })
            
            # Atomic write to avoid corruption on crash
@@ -771,6 +791,7 @@ class ProcessRegistry:
                    watcher_chat_id=entry.get("watcher_chat_id", ""),
                    watcher_thread_id=entry.get("watcher_thread_id", ""),
                    watcher_interval=entry.get("watcher_interval", 0),
+                    notify_on_complete=entry.get("notify_on_complete", False),
                )
                with self._lock:
                    self._running[session.id] = session
@@ -421,9 +421,11 @@ Do NOT use sed/awk to edit files — use patch instead.
 Do NOT use echo/cat heredoc to create files — use write_file instead.
 Reserve terminal for: builds, installs, git, processes, scripts, network, package managers, and anything that needs a shell.

-Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for everything that finishes.
-Background: ONLY for long-running servers, watchers, or processes that never exit. Set background=true to get a session_id, then use process(action="wait") to block until done — it returns instantly on completion, same as foreground. Use process(action="poll") only when you need a progress check without blocking.
-Do NOT use background for scripts, builds, or installs — foreground with a generous timeout is always better (fewer tool calls, instant results).
+Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for short commands.
+Background: Set background=true to get a session_id. Two patterns:
+  (1) Long-lived processes that never exit (servers, watchers).
+  (2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
+Use process(action="poll") for progress checks, process(action="wait") to block until done.
 Working directory: Use 'workdir' for per-command cwd.
 PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).

@@ -1009,6 +1011,7 @@ def terminal_tool(
    workdir: Optional[str] = None,
    check_interval: Optional[int] = None,
    pty: bool = False,
+    notify_on_complete: bool = False,
 ) -> str:
    """
    Execute a command in the configured terminal environment.
@@ -1022,6 +1025,7 @@ def terminal_tool(
        workdir: Working directory for this command (optional, uses session cwd if not set)
        check_interval: Seconds between auto-checks for background processes (gateway only, min 30)
        pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
+        notify_on_complete: If True and background=True, auto-notify the agent when the process exits

    Returns:
        str: JSON string with output, exit_code, and error fields
@@ -1254,6 +1258,32 @@ def terminal_tool(
                        f"configured limit of {max_timeout}s"
                    )

+                # Mark for agent notification on completion
+                if notify_on_complete and background:
+                    proc_session.notify_on_complete = True
+                    result_data["notify_on_complete"] = True
+
+                    # In gateway mode, auto-register a fast watcher so the
+                    # gateway can detect completion and trigger a new agent
+                    # turn.  CLI mode uses the completion_queue directly.
+                    _gw_platform = os.getenv("HERMES_SESSION_PLATFORM", "")
+                    if _gw_platform and not check_interval:
+                        _gw_chat_id = os.getenv("HERMES_SESSION_CHAT_ID", "")
+                        _gw_thread_id = os.getenv("HERMES_SESSION_THREAD_ID", "")
+                        proc_session.watcher_platform = _gw_platform
+                        proc_session.watcher_chat_id = _gw_chat_id
+                        proc_session.watcher_thread_id = _gw_thread_id
+                        proc_session.watcher_interval = 5
+                        process_registry.pending_watchers.append({
+                            "session_id": proc_session.id,
+                            "check_interval": 5,
+                            "session_key": session_key,
+                            "platform": _gw_platform,
+                            "chat_id": _gw_chat_id,
+                            "thread_id": _gw_thread_id,
+                            "notify_on_complete": True,
+                        })
+
                # Register check_interval watcher (gateway picks this up after agent run)
                if check_interval and background:
                    effective_interval = max(30, check_interval)
@@ -1550,7 +1580,7 @@ TERMINAL_SCHEMA = {
            },
            "background": {
                "type": "boolean",
-                "description": "ONLY for servers/watchers that never exit. For scripts, builds, installs — use foreground with timeout instead (it returns instantly when done).",
+                "description": "Run the command in the background. Two patterns: (1) Long-lived processes that never exit (servers, watchers). (2) Long-running tasks paired with notify_on_complete=true — you can keep working and get notified when the task finishes. For short commands, prefer foreground with a generous timeout instead.",
                "default": False
            },
            "timeout": {
@@ -1571,6 +1601,11 @@ TERMINAL_SCHEMA = {
                "type": "boolean",
                "description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
                "default": False
+            },
+            "notify_on_complete": {
+                "type": "boolean",
+                "description": "When true (and background=true), you'll be automatically notified when the process finishes — no polling needed. Use this for tasks that take a while (tests, builds, deployments) so you can keep working on other things in the meantime.",
+                "default": False
            }
        },
        "required": ["command"]
@@ -1587,6 +1622,7 @@ def _handle_terminal(args, **kw):
        workdir=args.get("workdir"),
        check_interval=args.get("check_interval"),
        pty=args.get("pty", False),
+        notify_on_complete=args.get("notify_on_complete", False),
    )


@@ -37,14 +37,12 @@ _HERMES_CORE_TOOLS = [
    "read_file", "write_file", "patch", "search_files",
    # Vision + image generation
    "vision_analyze", "image_generate",
-    # MoA
-    "mixture_of_agents",
    # Skills
    "skills_list", "skill_view", "skill_manage",
    # Browser automation
    "browser_navigate", "browser_snapshot", "browser_click",
    "browser_type", "browser_scroll", "browser_back",
-    "browser_press", "browser_close", "browser_get_images",
+    "browser_press", "browser_get_images",
    "browser_vision", "browser_console",
    # Text-to-speech
    "text_to_speech",
@@ -116,7 +114,7 @@ TOOLSETS = {
        "tools": [
            "browser_navigate", "browser_snapshot", "browser_click",
            "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
            "browser_vision", "browser_console", "web_search"
        ],
        "includes": []
@@ -214,7 +212,7 @@ TOOLSETS = {
    
    "safe": {
        "description": "Safe toolkit without terminal access",
-        "tools": ["mixture_of_agents"],
+        "tools": [],
        "includes": ["web", "vision", "image_gen"]
    },
    
@@ -235,7 +233,7 @@ TOOLSETS = {
            "skills_list", "skill_view", "skill_manage",
            "browser_navigate", "browser_snapshot", "browser_click",
            "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
            "browser_vision", "browser_console",
            "todo", "memory",
            "session_search",
@@ -255,14 +253,12 @@ TOOLSETS = {
            "read_file", "write_file", "patch", "search_files",
            # Vision + image generation
            "vision_analyze", "image_generate",
-            # MoA
-            "mixture_of_agents",
            # Skills
            "skills_list", "skill_view", "skill_manage",
            # Browser automation
            "browser_navigate", "browser_snapshot", "browser_click",
            "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
            "browser_vision", "browser_console",
            # Planning & memory
            "todo", "memory",
@@ -45,6 +45,20 @@ Already up to date.  (or: Updating abc1234..def5678)
 ✅ Hermes Agent updated successfully!
 ```

+### Recommended Post-Update Validation
+
+`hermes update` handles the main update path, but a quick validation confirms everything landed cleanly:
+
+1. `git status --short` — if the tree is unexpectedly dirty, inspect before continuing
+2. `hermes doctor` — checks config, dependencies, and service health
+3. `hermes --version` — confirm the version bumped as expected
+4. If you use the gateway: `hermes gateway status`
+5. If `doctor` reports npm audit issues: run `npm audit fix` in the flagged directory
+
+:::warning Dirty working tree after update
+If `git status --short` shows unexpected changes after `hermes update`, stop and inspect them before continuing. This usually means local modifications were reapplied on top of the updated code, or a dependency step refreshed lockfiles.
+:::
+
 ### Checking your current version

 ```bash
@@ -0,0 +1,261 @@
+---
+sidebar_position: 11
+title: "Automate Anything with Cron"
+description: "Real-world automation patterns using Hermes cron — monitoring, reports, pipelines, and multi-skill workflows"
+---
+
+# Automate Anything with Cron
+
+The [daily briefing bot tutorial](/docs/guides/daily-briefing-bot) covers the basics. This guide goes further — five real-world automation patterns you can adapt for your own workflows.
+
+For the full feature reference, see [Scheduled Tasks (Cron)](/docs/user-guide/features/cron).
+
+:::info Key Concept
+Cron jobs run in fresh agent sessions with no memory of your current chat. Prompts must be **completely self-contained** — include everything the agent needs to know.
+:::
+
+---
+
+## Pattern 1: Website Change Monitor
+
+Watch a URL for changes and get notified only when something is different.
+
+The `script` parameter is the secret weapon here. A Python script runs before each execution, and its stdout becomes context for the agent. The script handles the mechanical work (fetching, diffing); the agent handles the reasoning (is this change interesting?).
+
+Create the monitoring script:
+
+```bash
+mkdir -p ~/.hermes/scripts
+```
+
+```python title="~/.hermes/scripts/watch-site.py"
+import hashlib, json, os, urllib.request
+
+URL = "https://example.com/pricing"
+STATE_FILE = os.path.expanduser("~/.hermes/scripts/.watch-site-state.json")
+
+# Fetch current content
+req = urllib.request.Request(URL, headers={"User-Agent": "Hermes-Monitor/1.0"})
+content = urllib.request.urlopen(req, timeout=30).read().decode()
+current_hash = hashlib.sha256(content.encode()).hexdigest()
+
+# Load previous state
+prev_hash = None
+if os.path.exists(STATE_FILE):
+    with open(STATE_FILE) as f:
+        prev_hash = json.load(f).get("hash")
+
+# Save current state
+with open(STATE_FILE, "w") as f:
+    json.dump({"hash": current_hash, "url": URL}, f)
+
+# Output for the agent
+if prev_hash and prev_hash != current_hash:
+    print(f"CHANGE DETECTED on {URL}")
+    print(f"Previous hash: {prev_hash}")
+    print(f"Current hash: {current_hash}")
+    print(f"\nCurrent content (first 2000 chars):\n{content[:2000]}")
+else:
+    print("NO_CHANGE")
+```
+
+Set up the cron job:
+
+```bash
+/cron add "every 1h" "If the script output says CHANGE DETECTED, summarize what changed on the page and why it might matter. If it says NO_CHANGE, respond with just [SILENT]." --script ~/.hermes/scripts/watch-site.py --name "Pricing monitor" --deliver telegram
+```
+
+:::tip The [SILENT] Trick
+When the agent's final response contains `[SILENT]`, delivery is suppressed. This means you only get notified when something actually happens — no spam on quiet hours.
+:::
+
+---
+
+## Pattern 2: Weekly Report
+
+Compile information from multiple sources into a formatted summary. This runs once a week and delivers to your home channel.
+
+```bash
+/cron add "0 9 * * 1" "Generate a weekly report covering:
+
+1. Search the web for the top 5 AI news stories from the past week
+2. Search GitHub for trending repositories in the 'machine-learning' topic
+3. Check Hacker News for the most discussed AI/ML posts
+
+Format as a clean summary with sections for each source. Include links.
+Keep it under 500 words — highlight only what matters." --name "Weekly AI digest" --deliver telegram
+```
+
+From the CLI:
+
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly report covering the top AI news, trending ML GitHub repos, and most-discussed HN posts. Format with sections, include links, keep under 500 words." \
+  --name "Weekly AI digest" \
+  --deliver telegram
+```
+
+The `0 9 * * 1` is a standard cron expression: 9:00 AM every Monday.
+
+---
+
+## Pattern 3: GitHub Repository Watcher
+
+Monitor a repository for new issues, PRs, or releases.
+
+```bash
+/cron add "every 6h" "Check the GitHub repository NousResearch/hermes-agent for:
+- New issues opened in the last 6 hours
+- New PRs opened or merged in the last 6 hours
+- Any new releases
+
+Use the terminal to run gh commands:
+  gh issue list --repo NousResearch/hermes-agent --state open --json number,title,author,createdAt --limit 10
+  gh pr list --repo NousResearch/hermes-agent --state all --json number,title,author,createdAt,mergedAt --limit 10
+
+Filter to only items from the last 6 hours. If nothing new, respond with [SILENT].
+Otherwise, provide a concise summary of the activity." --name "Repo watcher" --deliver discord
+```
+
+:::warning Self-Contained Prompts
+Notice how the prompt includes the exact `gh` commands. The cron agent has no memory of previous runs or your preferences — spell everything out.
+:::
+
+---
+
+## Pattern 4: Data Collection Pipeline
+
+Scrape data at regular intervals, save to files, and detect trends over time. This pattern combines a script (for collection) with the agent (for analysis).
+
+```python title="~/.hermes/scripts/collect-prices.py"
+import json, os, urllib.request
+from datetime import datetime
+
+DATA_DIR = os.path.expanduser("~/.hermes/data/prices")
+os.makedirs(DATA_DIR, exist_ok=True)
+
+# Fetch current data (example: crypto prices)
+url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin,ethereum&vs_currencies=usd"
+data = json.loads(urllib.request.urlopen(url, timeout=30).read())
+
+# Append to history file
+entry = {"timestamp": datetime.now().isoformat(), "prices": data}
+history_file = os.path.join(DATA_DIR, "history.jsonl")
+with open(history_file, "a") as f:
+    f.write(json.dumps(entry) + "\n")
+
+# Load recent history for analysis
+lines = open(history_file).readlines()
+recent = [json.loads(l) for l in lines[-24:]]  # Last 24 data points
+
+# Output for the agent
+print(f"Current: BTC=${data['bitcoin']['usd']}, ETH=${data['ethereum']['usd']}")
+print(f"Data points collected: {len(lines)} total, showing last {len(recent)}")
+print(f"\nRecent history:")
+for r in recent[-6:]:
+    print(f"  {r['timestamp']}: BTC=${r['prices']['bitcoin']['usd']}, ETH=${r['prices']['ethereum']['usd']}")
+```
+
+```bash
+/cron add "every 1h" "Analyze the price data from the script output. Report:
+1. Current prices
+2. Trend direction over the last 6 data points (up/down/flat)
+3. Any notable movements (>5% change)
+
+If prices are flat and nothing notable, respond with [SILENT].
+If there's a significant move, explain what happened." \
+  --script ~/.hermes/scripts/collect-prices.py \
+  --name "Price tracker" \
+  --deliver telegram
+```
+
+The script does the mechanical collection; the agent adds the reasoning layer.
+
+---
+
+## Pattern 5: Multi-Skill Workflow
+
+Chain skills together for complex scheduled tasks. Skills are loaded in order before the prompt executes.
+
+```bash
+# Use the arxiv skill to find papers, then the obsidian skill to save notes
+/cron add "0 8 * * *" "Search arXiv for the 3 most interesting papers on 'language model reasoning' from the past day. For each paper, create an Obsidian note with the title, authors, abstract summary, and key contribution." \
+  --skill arxiv \
+  --skill obsidian \
+  --name "Paper digest"
+```
+
+From the tool directly:
+
+```python
+cronjob(
+    action="create",
+    skills=["arxiv", "obsidian"],
+    prompt="Search arXiv for papers on 'language model reasoning' from the past day. Save the top 3 as Obsidian notes.",
+    schedule="0 8 * * *",
+    name="Paper digest",
+    deliver="local"
+)
+```
+
+Skills are loaded in order — `arxiv` first (teaches the agent how to search papers), then `obsidian` (teaches how to write notes). The prompt ties them together.
+
+---
+
+## Managing Your Jobs
+
+```bash
+# List all active jobs
+/cron list
+
+# Trigger a job immediately (for testing)
+/cron run <job_id>
+
+# Pause a job without deleting it
+/cron pause <job_id>
+
+# Edit a running job's schedule or prompt
+/cron edit <job_id> --schedule "every 4h"
+/cron edit <job_id> --prompt "Updated task description"
+
+# Add or remove skills from an existing job
+/cron edit <job_id> --skill arxiv --skill obsidian
+/cron edit <job_id> --clear-skills
+
+# Remove a job permanently
+/cron remove <job_id>
+```
+
+---
+
+## Delivery Targets
+
+The `--deliver` flag controls where results go:
+
+| Target | Example | Use case |
+|--------|---------|----------|
+| `origin` | `--deliver origin` | Same chat that created the job (default) |
+| `local` | `--deliver local` | Save to local file only |
+| `telegram` | `--deliver telegram` | Your Telegram home channel |
+| `discord` | `--deliver discord` | Your Discord home channel |
+| `slack` | `--deliver slack` | Your Slack home channel |
+| Specific chat | `--deliver telegram:-1001234567890` | A specific Telegram group |
+| Threaded | `--deliver telegram:-1001234567890:17585` | A specific Telegram topic thread |
+
+---
+
+## Tips
+
+**Make prompts self-contained.** The agent in a cron job has no memory of your conversations. Include URLs, repo names, format preferences, and delivery instructions directly in the prompt.
+
+**Use `[SILENT]` liberally.** For monitoring jobs, always include instructions like "if nothing changed, respond with `[SILENT]`." This prevents notification noise.
+
+**Use scripts for data collection.** The `script` parameter lets a Python script handle the boring parts (HTTP requests, file I/O, state tracking). The agent only sees the script's stdout and applies reasoning to it. This is cheaper and more reliable than having the agent do the fetching itself.
+
+**Test with `/cron run`.** Before waiting for the schedule to trigger, use `/cron run <job_id>` to execute immediately and verify the output looks right.
+
+**Schedule expressions.** Human-readable formats like `every 2h`, `30m`, and `daily at 9am` all work alongside standard cron expressions like `0 9 * * *`.
+
+---
+
+*For the complete cron reference — all parameters, edge cases, and internals — see [Scheduled Tasks (Cron)](/docs/user-guide/features/cron).*
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 8
+sidebar_position: 9
 sidebar_label: "Build a Plugin"
 title: "Build a Hermes Plugin"
 description: "Step-by-step guide to building a complete Hermes plugin with tools, hooks, data files, and skills"
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 2
+sidebar_position: 3
 title: "Tutorial: Daily Briefing Bot"
 description: "Build an automated daily briefing bot that researches topics, summarizes findings, and delivers them to Telegram or Discord every morning"
 ---
@@ -0,0 +1,239 @@
+---
+sidebar_position: 13
+title: "Delegation & Parallel Work"
+description: "When and how to use subagent delegation — patterns for parallel research, code review, and multi-file work"
+---
+
+# Delegation & Parallel Work
+
+Hermes can spawn isolated child agents to work on tasks in parallel. Each subagent gets its own conversation, terminal session, and toolset. Only the final summary comes back — intermediate tool calls never enter your context window.
+
+For the full feature reference, see [Subagent Delegation](/docs/user-guide/features/delegation).
+
+---
+
+## When to Delegate
+
+**Good candidates for delegation:**
+- Reasoning-heavy subtasks (debugging, code review, research synthesis)
+- Tasks that would flood your context with intermediate data
+- Parallel independent workstreams (research A and B simultaneously)
+- Fresh-context tasks where you want the agent to approach without bias
+
+**Use something else:**
+- Single tool call → just use the tool directly
+- Mechanical multi-step work with logic between steps → `execute_code`
+- Tasks needing user interaction → subagents can't use `clarify`
+- Quick file edits → do them directly
+
+---
+
+## Pattern: Parallel Research
+
+Research three topics simultaneously and get structured summaries back:
+
+```
+Research these three topics in parallel:
+1. Current state of WebAssembly outside the browser
+2. RISC-V server chip adoption in 2025
+3. Practical quantum computing applications
+
+Focus on recent developments and key players.
+```
+
+Behind the scenes, Hermes uses:
+
+```python
+delegate_task(tasks=[
+    {
+        "goal": "Research WebAssembly outside the browser in 2025",
+        "context": "Focus on: runtimes (Wasmtime, Wasmer), cloud/edge use cases, WASI progress",
+        "toolsets": ["web"]
+    },
+    {
+        "goal": "Research RISC-V server chip adoption",
+        "context": "Focus on: server chips shipping, cloud providers adopting, software ecosystem",
+        "toolsets": ["web"]
+    },
+    {
+        "goal": "Research practical quantum computing applications",
+        "context": "Focus on: error correction breakthroughs, real-world use cases, key companies",
+        "toolsets": ["web"]
+    }
+])
+```
+
+All three run concurrently. Each subagent searches the web independently and returns a summary. The parent agent then synthesizes them into a coherent briefing.
+
+---
+
+## Pattern: Code Review
+
+Delegate a security review to a fresh-context subagent that approaches the code without preconceptions:
+
+```
+Review the authentication module at src/auth/ for security issues.
+Check for SQL injection, JWT validation problems, password handling,
+and session management. Fix anything you find and run the tests.
+```
+
+The key is the `context` field — it must include everything the subagent needs:
+
+```python
+delegate_task(
+    goal="Review src/auth/ for security issues and fix any found",
+    context="""Project at /home/user/webapp. Python 3.11, Flask, PyJWT, bcrypt.
+    Auth files: src/auth/login.py, src/auth/jwt.py, src/auth/middleware.py
+    Test command: pytest tests/auth/ -v
+    Focus on: SQL injection, JWT validation, password hashing, session management.
+    Fix issues found and verify tests pass.""",
+    toolsets=["terminal", "file"]
+)
+```
+
+:::warning The Context Problem
+Subagents know **absolutely nothing** about your conversation. They start completely fresh. If you delegate "fix the bug we were discussing," the subagent has no idea what bug you mean. Always pass file paths, error messages, project structure, and constraints explicitly.
+:::
+
+---
+
+## Pattern: Compare Alternatives
+
+Evaluate multiple approaches to the same problem in parallel, then pick the best:
+
+```
+I need to add full-text search to our Django app. Evaluate three approaches
+in parallel:
+1. PostgreSQL tsvector (built-in)
+2. Elasticsearch via django-elasticsearch-dsl
+3. Meilisearch via meilisearch-python
+
+For each: setup complexity, query capabilities, resource requirements,
+and maintenance overhead. Compare them and recommend one.
+```
+
+Each subagent researches one option independently. Because they're isolated, there's no cross-contamination — each evaluation stands on its own merits. The parent agent gets all three summaries and makes the comparison.
+
+---
+
+## Pattern: Multi-File Refactoring
+
+Split a large refactoring task across parallel subagents, each handling a different part of the codebase:
+
+```python
+delegate_task(tasks=[
+    {
+        "goal": "Refactor all API endpoint handlers to use the new response format",
+        "context": """Project at /home/user/api-server.
+        Files: src/handlers/users.py, src/handlers/auth.py, src/handlers/billing.py
+        Old format: return {"data": result, "status": "ok"}
+        New format: return APIResponse(data=result, status=200).to_dict()
+        Import: from src.responses import APIResponse
+        Run tests after: pytest tests/handlers/ -v""",
+        "toolsets": ["terminal", "file"]
+    },
+    {
+        "goal": "Update all client SDK methods to handle the new response format",
+        "context": """Project at /home/user/api-server.
+        Files: sdk/python/client.py, sdk/python/models.py
+        Old parsing: result = response.json()["data"]
+        New parsing: result = response.json()["data"] (same key, but add status code checking)
+        Also update sdk/python/tests/test_client.py""",
+        "toolsets": ["terminal", "file"]
+    },
+    {
+        "goal": "Update API documentation to reflect the new response format",
+        "context": """Project at /home/user/api-server.
+        Docs at: docs/api/. Format: Markdown with code examples.
+        Update all response examples from old format to new format.
+        Add a 'Response Format' section to docs/api/overview.md explaining the schema.""",
+        "toolsets": ["terminal", "file"]
+    }
+])
+```
+
+:::tip
+Each subagent gets its own terminal session. They can work on the same project directory without stepping on each other — as long as they're editing different files. If two subagents might touch the same file, handle that file yourself after the parallel work completes.
+:::
+
+---
+
+## Pattern: Gather Then Analyze
+
+Use `execute_code` for mechanical data gathering, then delegate the reasoning-heavy analysis:
+
+```python
+# Step 1: Mechanical gathering (execute_code is better here — no reasoning needed)
+execute_code("""
+from hermes_tools import web_search, web_extract
+
+results = []
+for query in ["AI funding Q1 2026", "AI startup acquisitions 2026", "AI IPOs 2026"]:
+    r = web_search(query, limit=5)
+    for item in r["data"]["web"]:
+        results.append({"title": item["title"], "url": item["url"], "desc": item["description"]})
+
+# Extract full content from top 5 most relevant
+urls = [r["url"] for r in results[:5]]
+content = web_extract(urls)
+
+# Save for the analysis step
+import json
+with open("/tmp/ai-funding-data.json", "w") as f:
+    json.dump({"search_results": results, "extracted": content["results"]}, f)
+print(f"Collected {len(results)} results, extracted {len(content['results'])} pages")
+""")
+
+# Step 2: Reasoning-heavy analysis (delegation is better here)
+delegate_task(
+    goal="Analyze AI funding data and write a market report",
+    context="""Raw data at /tmp/ai-funding-data.json contains search results and
+    extracted web pages about AI funding, acquisitions, and IPOs in Q1 2026.
+    Write a structured market report: key deals, trends, notable players,
+    and outlook. Focus on deals over $100M.""",
+    toolsets=["terminal", "file"]
+)
+```
+
+This is often the most efficient pattern: `execute_code` handles the 10+ sequential tool calls cheaply, then a subagent does the single expensive reasoning task with a clean context.
+
+---
+
+## Toolset Selection
+
+Choose toolsets based on what the subagent needs:
+
+| Task type | Toolsets | Why |
+|-----------|----------|-----|
+| Web research | `["web"]` | web_search + web_extract only |
+| Code work | `["terminal", "file"]` | Shell access + file operations |
+| Full-stack | `["terminal", "file", "web"]` | Everything except messaging |
+| Read-only analysis | `["file"]` | Can only read files, no shell |
+
+Restricting toolsets keeps the subagent focused and prevents accidental side effects (like a research subagent running shell commands).
+
+---
+
+## Constraints
+
+- **Max 3 parallel tasks** — batches are capped at 3 concurrent subagents
+- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
+- **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
+- **No conversation history** — subagents see only what you put in `goal` and `context`
+- **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
+
+---
+
+## Tips
+
+**Be specific in goals.** "Fix the bug" is too vague. "Fix the TypeError in api/handlers.py line 47 where process_request() receives None from parse_body()" gives the subagent enough to work with.
+
+**Include file paths.** Subagents don't know your project structure. Always include absolute paths to relevant files, the project root, and the test command.
+
+**Use delegation for context isolation.** Sometimes you want a fresh perspective. Delegating forces you to articulate the problem clearly, and the subagent approaches it without the assumptions that built up in your conversation.
+
+**Check results.** Subagent summaries are just that — summaries. If a subagent says "fixed the bug and tests pass," verify by running the tests yourself or reading the diff.
+
+---
+
+*For the complete delegation reference — all parameters, ACP integration, and advanced configuration — see [Subagent Delegation](/docs/user-guide/features/delegation).*
@@ -0,0 +1,219 @@
+---
+sidebar_position: 2
+title: "Run Local LLMs on Mac"
+description: "Set up a local OpenAI-compatible LLM server on macOS with llama.cpp or MLX, including model selection, memory optimization, and real benchmarks on Apple Silicon"
+---
+
+# Run Local LLMs on Mac
+
+This guide walks you through running a local LLM server on macOS with an OpenAI-compatible API. You get full privacy, zero API costs, and surprisingly good performance on Apple Silicon.
+
+We cover two backends:
+
+| Backend | Install | Best at | Format |
+|---------|---------|---------|--------|
+| **llama.cpp** | `brew install llama.cpp` | Fastest time-to-first-token, quantized KV cache for low memory | GGUF |
+| **omlx** | [omlx.ai](https://omlx.ai) | Fastest token generation, native Metal optimization | MLX (safetensors) |
+
+Both expose an OpenAI-compatible `/v1/chat/completions` endpoint. Hermes works with either one — just point it at `http://localhost:8080` or `http://localhost:8000`.
+
+:::info Apple Silicon only
+This guide targets Macs with Apple Silicon (M1 and later). Intel Macs will work with llama.cpp but without GPU acceleration — expect significantly slower performance.
+:::
+
+---
+
+## Choosing a model
+
+For getting started, we recommend **Qwen3.5-9B** — it's a strong reasoning model that fits comfortably in 8GB+ of unified memory with quantization.
+
+| Variant | Size on disk | RAM needed (128K context) | Backend |
+|---------|-------------|---------------------------|---------|
+| Qwen3.5-9B-Q4_K_M (GGUF) | 5.3 GB | ~10 GB with quantized KV cache | llama.cpp |
+| Qwen3.5-9B-mlx-lm-mxfp4 (MLX) | ~5 GB | ~12 GB | omlx |
+
+**Memory rule of thumb:** model size + KV cache. A 9B Q4 model is ~5 GB. The KV cache at 128K context with Q4 quantization adds ~4-5 GB. With default (f16) KV cache, that balloons to ~16 GB. The quantized KV cache flags in llama.cpp are the key trick for memory-constrained systems.
+
+For larger models (27B, 35B), you'll need 32 GB+ of unified memory. The 9B is the sweet spot for 8-16 GB machines.
+
+---
+
+## Option A: llama.cpp
+
+llama.cpp is the most portable local LLM runtime. On macOS it uses Metal for GPU acceleration out of the box.
+
+### Install
+
+```bash
+brew install llama.cpp
+```
+
+This gives you the `llama-server` command globally.
+
+### Download the model
+
+You need a GGUF-format model. The easiest source is Hugging Face via the `huggingface-cli`:
+
+```bash
+brew install huggingface-cli
+```
+
+Then download:
+
+```bash
+huggingface-cli download unsloth/Qwen3.5-9B-GGUF Qwen3.5-9B-Q4_K_M.gguf --local-dir ~/models
+```
+
+:::tip Gated models
+Some models on Hugging Face require authentication. Run `huggingface-cli login` first if you get a 401 or 404 error.
+:::
+
+### Start the server
+
+```bash
+llama-server -m ~/models/Qwen3.5-9B-Q4_K_M.gguf \
+  -ngl 99 \
+  -c 131072 \
+  -np 1 \
+  -fa on \
+  --cache-type-k q4_0 \
+  --cache-type-v q4_0 \
+  --host 0.0.0.0
+```
+
+Here's what each flag does:
+
+| Flag | Purpose |
+|------|---------|
+| `-ngl 99` | Offload all layers to GPU (Metal). Use a high number to ensure nothing stays on CPU. |
+| `-c 131072` | Context window size (128K tokens). Reduce this if you're low on memory. |
+| `-np 1` | Number of parallel slots. Keep at 1 for single-user use — more slots split your memory budget. |
+| `-fa on` | Flash attention. Reduces memory usage and speeds up long-context inference. |
+| `--cache-type-k q4_0` | Quantize the key cache to 4-bit. **This is the big memory saver.** |
+| `--cache-type-v q4_0` | Quantize the value cache to 4-bit. Together with the above, this cuts KV cache memory by ~75% vs f16. |
+| `--host 0.0.0.0` | Listen on all interfaces. Use `127.0.0.1` if you don't need network access. |
+
+The server is ready when you see:
+
+```
+main: server is listening on http://0.0.0.0:8080
+srv  update_slots: all slots are idle
+```
+
+### Memory optimization for constrained systems
+
+The `--cache-type-k q4_0 --cache-type-v q4_0` flags are the most important optimization for systems with limited memory. Here's the impact at 128K context:
+
+| KV cache type | KV cache memory (128K ctx, 9B model) |
+|---------------|--------------------------------------|
+| f16 (default) | ~16 GB |
+| q8_0 | ~8 GB |
+| **q4_0** | **~4 GB** |
+
+On an 8 GB Mac, use `q4_0` KV cache and reduce context to `-c 32768` (32K). On 16 GB, you can comfortably do 128K context. On 32 GB+, you can run larger models or multiple parallel slots.
+
+If you're still running out of memory, reduce context size first (`-c`), then try a smaller quantization (Q3_K_M instead of Q4_K_M).
+
+### Test it
+
+```bash
+curl -s http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen3.5-9B-Q4_K_M.gguf",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 50
+  }' | jq .choices[0].message.content
+```
+
+### Get the model name
+
+If you forget the model name, query the models endpoint:
+
+```bash
+curl -s http://localhost:8080/v1/models | jq '.data[].id'
+```
+
+---
+
+## Option B: MLX via omlx
+
+[omlx](https://omlx.ai) is a macOS-native app that manages and serves MLX models. MLX is Apple's own machine learning framework, optimized specifically for Apple Silicon's unified memory architecture.
+
+### Install
+
+Download and install from [omlx.ai](https://omlx.ai). It provides a GUI for model management and a built-in server.
+
+### Download the model
+
+Use the omlx app to browse and download models. Search for `Qwen3.5-9B-mlx-lm-mxfp4` and download it. Models are stored locally (typically in `~/.omlx/models/`).
+
+### Start the server
+
+omlx serves models on `http://127.0.0.1:8000` by default. Start serving from the app UI, or use the CLI if available.
+
+### Test it
+
+```bash
+curl -s http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen3.5-9B-mlx-lm-mxfp4",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 50
+  }' | jq .choices[0].message.content
+```
+
+### List available models
+
+omlx can serve multiple models simultaneously:
+
+```bash
+curl -s http://127.0.0.1:8000/v1/models | jq '.data[].id'
+```
+
+---
+
+## Benchmarks: llama.cpp vs MLX
+
+Both backends tested on the same machine (Apple M5 Max, 128 GB unified memory) running the same model (Qwen3.5-9B) at comparable quantization levels (Q4_K_M for GGUF, mxfp4 for MLX). Five diverse prompts, three runs each, backends tested sequentially to avoid resource contention.
+
+### Results
+
+| Metric | llama.cpp (Q4_K_M) | MLX (mxfp4) | Winner |
+|--------|-------------------|-------------|--------|
+| **TTFT (avg)** | **67 ms** | 289 ms | llama.cpp (4.3x faster) |
+| **TTFT (p50)** | **66 ms** | 286 ms | llama.cpp (4.3x faster) |
+| **Generation (avg)** | 70 tok/s | **96 tok/s** | MLX (37% faster) |
+| **Generation (p50)** | 70 tok/s | **96 tok/s** | MLX (37% faster) |
+| **Total time (512 tokens)** | 7.3s | **5.5s** | MLX (25% faster) |
+
+### What this means
+
+- **llama.cpp** excels at prompt processing — its flash attention + quantized KV cache pipeline gets you the first token in ~66ms. If you're building interactive applications where perceived responsiveness matters (chatbots, autocomplete), this is a meaningful advantage.
+
+- **MLX** generates tokens ~37% faster once it gets going. For batch workloads, long-form generation, or any task where total completion time matters more than initial latency, MLX finishes sooner.
+
+- Both backends are **extremely consistent** — variance across runs was negligible. You can rely on these numbers.
+
+### Which one should you pick?
+
+| Use case | Recommendation |
+|----------|---------------|
+| Interactive chat, low-latency tools | llama.cpp |
+| Long-form generation, bulk processing | MLX (omlx) |
+| Memory-constrained (8-16 GB) | llama.cpp (quantized KV cache is unmatched) |
+| Serving multiple models simultaneously | omlx (built-in multi-model support) |
+| Maximum compatibility (Linux too) | llama.cpp |
+
+---
+
+## Connect to Hermes
+
+Once your local server is running:
+
+```bash
+hermes model
+```
+
+Select **Custom endpoint** and follow the prompts. It will ask for the base URL and model name — use the values from whichever backend you set up above.
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 7
+sidebar_position: 10
 title: "Migrate from OpenClaw"
 description: "Complete guide to migrating your OpenClaw / Clawdbot setup to Hermes Agent — what gets migrated, how config maps, and what to check after."
 ---
@@ -166,7 +166,7 @@ These are saved to `~/.hermes/migration/openclaw/<timestamp>/archive/` for manua
 | `HEARTBEAT.md` | `archive/workspace/HEARTBEAT.md` | Use cron jobs for periodic tasks |
 | `BOOTSTRAP.md` | `archive/workspace/BOOTSTRAP.md` | Use context files or skills |
 | Cron jobs | `archive/cron-config.json` | Recreate with `hermes cron create` |
-| Plugins | `archive/plugins-config.json` | See [plugins guide](../user-guide/features/hooks.md) |
+| Plugins | `archive/plugins-config.json` | See [plugins guide](/docs/user-guide/features/hooks) |
 | Hooks/webhooks | `archive/hooks-config.json` | Use `hermes webhook` or gateway hooks |
 | Memory backend | `archive/memory-backend-config.json` | Configure via `hermes honcho` |
 | Skills registry | `archive/skills-registry-config.json` | Use `hermes skills config` |
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 4
+sidebar_position: 5
 title: "Using Hermes as a Python Library"
 description: "Embed AIAgent in your own Python scripts, web apps, or automation pipelines — no CLI required"
 ---
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 3
+sidebar_position: 4
 title: "Tutorial: Team Telegram Assistant"
 description: "Step-by-step guide to setting up a Telegram bot that your whole team can use for code help, research, system admin, and more"
 ---
@@ -24,7 +24,7 @@ A Telegram bot that:

 Before starting, make sure you have:

- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/getting-started/learning-path) if you haven't yet.
+- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/docs/getting-started/installation) if you haven't yet.
 - **A Telegram account** for yourself (the bot owner)
 - **An LLM provider configured** — at minimum, an API key for OpenAI, Anthropic, or another supported provider in `~/.hermes/.env`

@@ -428,13 +428,13 @@ hermes gateway stop && hermes gateway start

 You've got a working team Telegram assistant. Here are some next steps:

- **[Security Guide](/user-guide/security)** — deep dive into authorization, container isolation, and command approval
- **[Messaging Gateway](/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands
- **[Telegram Setup](/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS
- **[Scheduled Tasks](/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions
- **[Context Files](/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge
- **[Personality](/user-guide/features/personality)** — built-in personality presets and custom persona definitions
- **Add more platforms** — the same gateway can simultaneously run [Discord](/user-guide/messaging/discord), [Slack](/user-guide/messaging/slack), and [WhatsApp](/user-guide/messaging/whatsapp)
+- **[Security Guide](/docs/user-guide/security)** — deep dive into authorization, container isolation, and command approval
+- **[Messaging Gateway](/docs/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands
+- **[Telegram Setup](/docs/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS
+- **[Scheduled Tasks](/docs/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions
+- **[Context Files](/docs/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge
+- **[Personality](/docs/user-guide/features/personality)** — built-in personality presets and custom persona definitions
+- **Add more platforms** — the same gateway can simultaneously run [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), and [WhatsApp](/docs/user-guide/messaging/whatsapp)

 ---

@@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 6
 title: "Use MCP with Hermes"
 description: "A practical guide to connecting MCP servers to Hermes Agent, filtering their tools, and using them safely in real workflows"
 ---
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 6
+sidebar_position: 7
 title: "Use SOUL.md with Hermes"
 description: "How to use SOUL.md to shape Hermes Agent's default voice, what belongs there, and how it differs from AGENTS.md and /personality"
 ---
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 7
+sidebar_position: 8
 title: "Use Voice Mode with Hermes"
 description: "A practical guide to setting up and using Hermes voice mode across CLI, Telegram, Discord, and Discord voice channels"
 ---
@@ -0,0 +1,268 @@
+---
+sidebar_position: 12
+title: "Working with Skills"
+description: "Find, install, use, and create skills — on-demand knowledge that teaches Hermes new workflows"
+---
+
+# Working with Skills
+
+Skills are on-demand knowledge documents that teach Hermes how to handle specific tasks — from generating ASCII art to managing GitHub PRs. This guide walks you through using them day to day.
+
+For the full technical reference, see [Skills System](/docs/user-guide/features/skills).
+
+---
+
+## Finding Skills
+
+Every Hermes installation ships with bundled skills. See what's available:
+
+```bash
+# In any chat session:
+/skills
+
+# Or from the CLI:
+hermes skills list
+```
+
+This shows a compact list with names and descriptions:
+
+```
+ascii-art         Generate ASCII art using pyfiglet, cowsay, boxes...
+arxiv             Search and retrieve academic papers from arXiv...
+github-pr-workflow Full PR lifecycle — create branches, commit...
+plan              Plan mode — inspect context, write a markdown...
+excalidraw        Create hand-drawn style diagrams using Excalidraw...
+```
+
+### Searching for a Skill
+
+```bash
+# Search by keyword
+/skills search docker
+/skills search music
+```
+
+### The Skills Hub
+
+Official optional skills (heavier or niche skills not active by default) are available via the Hub:
+
+```bash
+# Browse official optional skills
+/skills browse
+
+# Search the hub
+/skills search blockchain
+```
+
+---
+
+## Using a Skill
+
+Every installed skill is automatically a slash command. Just type its name:
+
+```bash
+# Load a skill and give it a task
+/ascii-art Make a banner that says "HELLO WORLD"
+/plan Design a REST API for a todo app
+/github-pr-workflow Create a PR for the auth refactor
+
+# Just the skill name (no task) loads it and lets you describe what you need
+/excalidraw
+```
+
+You can also trigger skills through natural conversation — ask Hermes to use a specific skill, and it will load it via the `skill_view` tool.
+
+### Progressive Disclosure
+
+Skills use a token-efficient loading pattern. The agent doesn't load everything at once:
+
+1. **`skills_list()`** — compact list of all skills (~3k tokens). Loaded at session start.
+2. **`skill_view(name)`** — full SKILL.md content for one skill. Loaded when the agent decides it needs that skill.
+3. **`skill_view(name, file_path)`** — a specific reference file within the skill. Only loaded if needed.
+
+This means skills don't cost tokens until they're actually used.
+
+---
+
+## Installing from the Hub
+
+Official optional skills ship with Hermes but aren't active by default. Install them explicitly:
+
+```bash
+# Install an official optional skill
+hermes skills install official/research/arxiv
+
+# Install from the hub in a chat session
+/skills install official/creative/songwriting-and-ai-music
+```
+
+What happens:
+1. The skill directory is copied to `~/.hermes/skills/`
+2. It appears in your `skills_list` output
+3. It becomes available as a slash command
+
+:::tip
+Installed skills take effect in new sessions. If you want it available in the current session, use `/reset` to start fresh, or add `--now` to invalidate the prompt cache immediately (costs more tokens on the next turn).
+:::
+
+### Verifying Installation
+
+```bash
+# Check it's there
+hermes skills list | grep arxiv
+
+# Or in chat
+/skills search arxiv
+```
+
+---
+
+## Configuring Skill Settings
+
+Some skills declare configuration they need in their frontmatter:
+
+```yaml
+metadata:
+  hermes:
+    config:
+      - key: tenor.api_key
+        description: "Tenor API key for GIF search"
+        prompt: "Enter your Tenor API key"
+        url: "https://developers.google.com/tenor/guides/quickstart"
+```
+
+When a skill with config is first loaded, Hermes prompts you for the values. They're stored in `config.yaml` under `skills.config.*`.
+
+Manage skill config from the CLI:
+
+```bash
+# Interactive config for a specific skill
+hermes skills config gif-search
+
+# View all skill config
+hermes config get skills.config
+```
+
+---
+
+## Creating Your Own Skill
+
+Skills are just markdown files with YAML frontmatter. Creating one takes under five minutes.
+
+### 1. Create the Directory
+
+```bash
+mkdir -p ~/.hermes/skills/my-category/my-skill
+```
+
+### 2. Write SKILL.md
+
+```markdown title="~/.hermes/skills/my-category/my-skill/SKILL.md"
+---
+name: my-skill
+description: Brief description of what this skill does
+version: 1.0.0
+metadata:
+  hermes:
+    tags: [my-tag, automation]
+    category: my-category
+---
+
+# My Skill
+
+## When to Use
+Use this skill when the user asks about [specific topic] or needs to [specific task].
+
+## Procedure
+1. First, check if [prerequisite] is available
+2. Run `command --with-flags`
+3. Parse the output and present results
+
+## Pitfalls
+- Common failure: [description]. Fix: [solution]
+- Watch out for [edge case]
+
+## Verification
+Run `check-command` to confirm the result is correct.
+```
+
+### 3. Add Reference Files (Optional)
+
+Skills can include supporting files the agent loads on demand:
+
+```
+my-skill/
+├── SKILL.md                    # Main skill document
+├── references/
+│   ├── api-docs.md             # API reference the agent can consult
+│   └── examples.md             # Example inputs/outputs
+├── templates/
+│   └── config.yaml             # Template files the agent can use
+└── scripts/
+    └── setup.sh                # Scripts the agent can execute
+```
+
+Reference these in your SKILL.md:
+
+```markdown
+For API details, load the reference: `skill_view("my-skill", "references/api-docs.md")`
+```
+
+### 4. Test It
+
+Start a new session and try your skill:
+
+```bash
+hermes chat -q "/my-skill help me with the thing"
+```
+
+The skill appears automatically — no registration needed. Drop it in `~/.hermes/skills/` and it's live.
+
+:::info
+The agent can also create and update skills itself using `skill_manage`. After solving a complex problem, Hermes may offer to save the approach as a skill for next time.
+:::
+
+---
+
+## Per-Platform Skill Management
+
+Control which skills are available on which platforms:
+
+```bash
+hermes skills
+```
+
+This opens an interactive TUI where you can enable or disable skills per platform (CLI, Telegram, Discord, etc.). Useful when you want certain skills only available in specific contexts — for example, keeping development skills off Telegram.
+
+---
+
+## Skills vs Memory
+
+Both are persistent across sessions, but they serve different purposes:
+
+| | Skills | Memory |
+|---|---|---|
+| **What** | Procedural knowledge — how to do things | Factual knowledge — what things are |
+| **When** | Loaded on demand, only when relevant | Injected into every session automatically |
+| **Size** | Can be large (hundreds of lines) | Should be compact (key facts only) |
+| **Cost** | Zero tokens until loaded | Small but constant token cost |
+| **Examples** | "How to deploy to Kubernetes" | "User prefers dark mode, lives in PST" |
+| **Who creates** | You, the agent, or installed from Hub | The agent, based on conversations |
+
+**Rule of thumb:** If you'd put it in a reference document, it's a skill. If you'd put it on a sticky note, it's memory.
+
+---
+
+## Tips
+
+**Keep skills focused.** A skill that tries to cover "all of DevOps" will be too long and too vague. A skill that covers "deploy a Python app to Fly.io" is specific enough to be genuinely useful.
+
+**Let the agent create skills.** After a complex multi-step task, Hermes will often offer to save the approach as a skill. Say yes — these agent-authored skills capture the exact workflow including pitfalls that were discovered along the way.
+
+**Use categories.** Organize skills into subdirectories (`~/.hermes/skills/devops/`, `~/.hermes/skills/research/`, etc.). This keeps the list manageable and helps the agent find relevant skills faster.
+
+**Update skills when they go stale.** If you use a skill and hit issues not covered by it, tell Hermes to update the skill with what you learned. Skills that aren't maintained become liabilities.
+
+---
+
+*For the complete skills reference — frontmatter fields, conditional activation, external directories, and more — see [Skills System](/docs/user-guide/features/skills).*
@@ -846,6 +846,7 @@ You can switch between providers at any time with `hermes model` — no restart
 | OpenAI TTS + voice transcription | [OpenAI](https://platform.openai.com/api-keys) | `VOICE_TOOLS_OPENAI_KEY` |
 | RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
 | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` |
+| Semantic long-term memory | [Supermemory](https://supermemory.ai) | `SUPERMEMORY_API_KEY` |

 ### Self-Hosting Firecrawl

@@ -383,7 +383,7 @@ Subcommands:
 hermes memory <subcommand>
 ```

-Set up and manage external memory provider plugins. Available providers: honcho, openviking, mem0, hindsight, holographic, retaindb, byterover. Only one external provider can be active at a time. Built-in memory (MEMORY.md/USER.md) is always active.
+Set up and manage external memory provider plugins. Available providers: honcho, openviking, mem0, hindsight, holographic, retaindb, byterover, supermemory. Only one external provider can be active at a time. Built-in memory (MEMORY.md/USER.md) is always active.

 Subcommands:

@@ -98,6 +98,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `GITHUB_TOKEN` | GitHub token for Skills Hub (higher API rate limits, skill publish) |
 | `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) |
 | `HONCHO_BASE_URL` | Base URL for self-hosted Honcho instances (default: Honcho cloud). No API key required for local instances |
+| `SUPERMEMORY_API_KEY` | Semantic long-term memory with profile recall and session ingest ([supermemory.ai](https://supermemory.ai)) |
 | `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) |
 | `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |
 | `DAYTONA_API_KEY` | Daytona cloud sandboxes ([daytona.io](https://daytona.io/)) |
@@ -20,7 +20,6 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 |------|-------------|----------------------|
 | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
 | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
-| `browser_close` | Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota. | — |
 | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
 | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
 | `browser_navigate` | Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need… | — |
@@ -52,7 +52,7 @@ Or in-session:

 | Toolset | Tools | Purpose |
 |---------|-------|---------|
-| `browser` | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
+| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
 | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
@@ -21,7 +21,7 @@ If this is your first time running Hermes Agent, create a data directory on the
 mkdir -p ~/.hermes
 docker run -it --rm \
  -v ~/.hermes:/opt/data \
-  nousresearch/hermes-agent
+  nousresearch/hermes-agent setup
 ```

 This drops you into the setup wizard, which will prompt you for your API keys and write them to `~/.hermes/.env`. You only need to do this once. It is highly recommended to set up a chat system for the gateway to work with at this point.
@@ -277,10 +277,6 @@ Check the browser console for any JavaScript errors

 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.

-### `browser_close`
-
-Close the browser session and release resources. Call this when done to free up Browserbase session quota.
-
 ## Practical Examples

 ### Filling Out a Web Form
@@ -295,7 +291,6 @@ Agent workflow:
 4. browser_type(ref="@e5", text="SecurePass123")
 5. browser_click(ref="@e8")  → clicks "Create Account"
 6. browser_snapshot()  → confirms success
-7. browser_close()
 ```

 ### Researching Dynamic Content
@@ -307,7 +302,6 @@ Agent workflow:
 1. browser_navigate("https://github.com/trending")
 2. browser_snapshot(full=true)  → reads trending repo list
 3. Returns formatted results
-4. browser_close()
 ```

 ## Session Recording
@@ -349,5 +343,5 @@ If paid features aren't available on your plan, Hermes automatically falls back
 - **Text-based interaction** — relies on accessibility tree, not pixel coordinates
 - **Snapshot size** — large pages may be truncated or LLM-summarized at 8000 characters
 - **Session timeout** — cloud sessions expire based on your provider's plan settings
- **Cost** — cloud sessions consume provider credits; use `browser_close` when done. Use `/browser connect` for free local browsing.
+- **Cost** — cloud sessions consume provider credits; sessions are automatically cleaned up when the conversation ends or after inactivity. Use `/browser connect` for free local browsing.
 - **No file downloads** — cannot download files from the browser
@@ -1,12 +1,12 @@
 ---
 sidebar_position: 4
 title: "Memory Providers"
-description: "External memory provider plugins — Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover"
+description: "External memory provider plugins — Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover, Supermemory"
 ---

 # Memory Providers

-Hermes Agent ships with 7 external memory provider plugins that give the agent persistent, cross-session knowledge beyond the built-in MEMORY.md and USER.md. Only **one** external provider can be active at a time — the built-in memory is always active alongside it.
+Hermes Agent ships with 8 external memory provider plugins that give the agent persistent, cross-session knowledge beyond the built-in MEMORY.md and USER.md. Only **one** external provider can be active at a time — the built-in memory is always active alongside it.

 ## Quick Start

@@ -20,7 +20,7 @@ Or set manually in `~/.hermes/config.yaml`:

 ```yaml
 memory:
-  provider: openviking   # or honcho, mem0, hindsight, holographic, retaindb, byterover
+  provider: openviking   # or honcho, mem0, hindsight, holographic, retaindb, byterover, supermemory
 ```

 ## How It Works
@@ -382,6 +382,47 @@ hermes config set memory.provider byterover

 ---

+### Supermemory
+
+Semantic long-term memory with profile recall, semantic search, explicit memory tools, and session-end conversation ingest via the Supermemory graph API.
+
+| | |
+|---|---|
+| **Best for** | Semantic recall with user profiling and session-level graph building |
+| **Requires** | `pip install supermemory` + [API key](https://supermemory.ai) |
+| **Data storage** | Supermemory Cloud |
+| **Cost** | Supermemory pricing |
+
+**Tools:** `supermemory_store` (save explicit memories), `supermemory_search` (semantic similarity search), `supermemory_forget` (forget by ID or best-match query), `supermemory_profile` (persistent profile + recent context)
+
+**Setup:**
+```bash
+hermes memory setup    # select "supermemory"
+# Or manually:
+hermes config set memory.provider supermemory
+echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
+```
+
+**Config:** `$HERMES_HOME/supermemory.json`
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `container_tag` | `hermes` | Container tag used for search and writes |
+| `auto_recall` | `true` | Inject relevant memory context before turns |
+| `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
+| `max_recall_results` | `10` | Max recalled items to format into context |
+| `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
+| `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
+
+**Key features:**
+- Automatic context fencing — strips recalled memories from captured turns to prevent recursive memory pollution
+- Session-end conversation ingest for richer graph-level knowledge building
+- Profile facts injected on first turn and at configurable intervals
+- Trivial message filtering (skips "ok", "thanks", etc.)
+
+---
+
 ## Provider Comparison

 | Provider | Storage | Cost | Tools | Dependencies | Unique Feature |
@@ -393,13 +434,14 @@ hermes config set memory.provider byterover
 | **Holographic** | Local | Free | 2 | None | HRR algebra + trust scoring |
 | **RetainDB** | Cloud | $20/mo | 5 | `requests` | Delta compression |
 | **ByteRover** | Local/Cloud | Free/Paid | 3 | `brv` CLI | Pre-compression extraction |
+| **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest |

 ## Profile Isolation

 Each provider's data is isolated per [profile](/docs/user-guide/profiles):

 - **Local storage providers** (Holographic, ByteRover) use `$HERMES_HOME/` paths which differ per profile
- **Config file providers** (Honcho, Mem0, Hindsight) store config in `$HERMES_HOME/` so each profile has its own credentials
+- **Config file providers** (Honcho, Mem0, Hindsight, Supermemory) store config in `$HERMES_HOME/` so each profile has its own credentials
 - **Cloud providers** (RetainDB) auto-derive profile-scoped project names
 - **Env var providers** (OpenViking) are configured via each profile's `.env` file

@@ -209,7 +209,7 @@ memory:

 ## External Memory Providers

-For deeper, persistent memory that goes beyond MEMORY.md and USER.md, Hermes ships with 7 external memory provider plugins — including Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, and ByteRover.
+For deeper, persistent memory that goes beyond MEMORY.md and USER.md, Hermes ships with 8 external memory provider plugins — including Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover, and Supermemory.

 External providers run **alongside** built-in memory (never replacing it) and add capabilities like knowledge graphs, semantic search, automatic fact extraction, and cross-session user modeling.

@@ -25,17 +25,21 @@ The Signal adapter uses `httpx` (already a core Hermes dependency) for all commu
 ### Installing signal-cli

 ```bash
-# Linux (Debian/Ubuntu)
-sudo apt install signal-cli
-
 # macOS
 brew install signal-cli

-# Manual install (any platform)
-# Download from https://github.com/AsamK/signal-cli/releases
-# Extract and add to PATH
+# Linux (download latest release)
+VERSION=$(curl -Ls -o /dev/null -w %{url_effective} \
+  https://github.com/AsamK/signal-cli/releases/latest | sed 's/^.*\/v//')
+curl -L -O "https://github.com/AsamK/signal-cli/releases/download/v${VERSION}/signal-cli-${VERSION}.tar.gz"
+sudo tar xf "signal-cli-${VERSION}.tar.gz" -C /opt
+sudo ln -sf "/opt/signal-cli-${VERSION}/bin/signal-cli" /usr/local/bin/
 ```

+:::caution
+signal-cli is **not** in apt or snap repositories. The Linux install above downloads directly from [GitHub releases](https://github.com/AsamK/signal-cli/releases).
+:::
+
 ---

 ## Step 1: Link Your Signal Account
@@ -132,13 +132,17 @@ const sidebars: SidebarsConfig = {
      collapsed: true,
      items: [
        'guides/tips',
-        'guides/build-a-hermes-plugin',
+        'guides/local-llm-on-mac',
        'guides/daily-briefing-bot',
        'guides/team-telegram-assistant',
        'guides/python-library',
        'guides/use-mcp-with-hermes',
        'guides/use-soul-with-hermes',
        'guides/use-voice-mode-with-hermes',
+        'guides/build-a-hermes-plugin',
+        'guides/automate-with-cron',
+        'guides/work-with-skills',
+        'guides/delegation-patterns',
        'guides/migrate-from-openclaw',
      ],
    },