Fix TUI input field ANSI leaks and text selection issues

1. Fix ANSI escape code leakage during scroll operations: - Add ensureSafeAnsi utility to ensure all ANSI sequences are properly terminated - Modify renderWithCursor and renderWithSelection to always include reset codes - Add final safety check in text rendering to catch any potential leaks 2. Improve text selection in input field: - Add proper mouse drag event handling for text selection - Enhance click handlers to support selection operations - Fix edge cases in selection rendering - Ensure proper reset of ANSI codes after selections Fixes reported issues where users couldn't copy/paste text in input field and experienced ANSI leakage during scrolling operations.
2026-04-26 23:31:49 -05:00
330 changed files with 1966 additions and 25856 deletions
@@ -69,4 +69,3 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
-models-dev-upstream/
@@ -30,22 +30,18 @@ WORKDIR /opt/hermes
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
-COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
-COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    (cd web && npm install --prefer-offline --no-audit) && \
-    (cd ui-tui && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build browser dashboard and terminal UI assets.
-RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
+RUN cd web && npm run build

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -1,632 +0,0 @@
-"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
-
-This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
-backend. Each request starts a short-lived ACP session, sends the formatted
-conversation as a single prompt, collects text chunks, and converts the result
-back into the minimal shape Hermes expects from an OpenAI client.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import queue
-import re
-import shlex
-import subprocess
-import threading
-import time
-from collections import deque
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
-ACP_MARKER_BASE_URL = "acp://copilot"
-_DEFAULT_TIMEOUT_SECONDS = 900.0
-
-_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
-_TOOL_CALL_JSON_RE = re.compile(
-    r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
-    re.DOTALL,
-)
-
-
-def _resolve_command() -> str:
-    return (
-        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
-        or os.getenv("COPILOT_CLI_PATH", "").strip()
-        or "copilot"
-    )
-
-
-def _resolve_args() -> list[str]:
-    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
-    if not raw:
-        return ["--acp", "--stdio"]
-    return shlex.split(raw)
-
-
-def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "error": {
-            "code": code,
-            "message": message,
-        },
-    }
-
-
-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
-def _format_messages_as_prompt(
-    messages: list[dict[str, Any]],
-    model: str | None = None,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: Any = None,
-) -> str:
-    sections: list[str] = [
-        "You are being used as the active ACP agent backend for Hermes.",
-        "Use ACP capabilities to complete tasks.",
-        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
-        "If no tool is needed, answer normally.",
-    ]
-    if model:
-        sections.append(f"Hermes requested model hint: {model}")
-
-    if isinstance(tools, list) and tools:
-        tool_specs: list[dict[str, Any]] = []
-        for t in tools:
-            if not isinstance(t, dict):
-                continue
-            fn = t.get("function") or {}
-            if not isinstance(fn, dict):
-                continue
-            name = fn.get("name")
-            if not isinstance(name, str) or not name.strip():
-                continue
-            tool_specs.append(
-                {
-                    "name": name.strip(),
-                    "description": fn.get("description", ""),
-                    "parameters": fn.get("parameters", {}),
-                }
-            )
-        if tool_specs:
-            sections.append(
-                "Available tools (OpenAI function schema). "
-                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
-                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
-                + json.dumps(tool_specs, ensure_ascii=False)
-            )
-
-    if tool_choice is not None:
-        sections.append(
-            f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
-        )
-
-    transcript: list[str] = []
-    for message in messages:
-        if not isinstance(message, dict):
-            continue
-        role = str(message.get("role") or "unknown").strip().lower()
-        if role == "tool":
-            role = "tool"
-        elif role not in {"system", "user", "assistant"}:
-            role = "context"
-
-        content = message.get("content")
-        rendered = _render_message_content(content)
-        if not rendered:
-            continue
-
-        label = {
-            "system": "System",
-            "user": "User",
-            "assistant": "Assistant",
-            "tool": "Tool",
-            "context": "Context",
-        }.get(role, role.title())
-        transcript.append(f"{label}:\n{rendered}")
-
-    if transcript:
-        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
-
-    sections.append("Continue the conversation from the latest user request.")
-    return "\n\n".join(
-        section.strip() for section in sections if section and section.strip()
-    )
-
-
-def _render_message_content(content: Any) -> str:
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content.strip()
-    if isinstance(content, dict):
-        if "text" in content:
-            return str(content.get("text") or "").strip()
-        if "content" in content and isinstance(content.get("content"), str):
-            return str(content.get("content") or "").strip()
-        return json.dumps(content, ensure_ascii=True)
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str) and text.strip():
-                    parts.append(text.strip())
-        return "\n".join(parts).strip()
-    return str(content).strip()
-
-
-def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
-    if not isinstance(text, str) or not text.strip():
-        return [], ""
-
-    extracted: list[SimpleNamespace] = []
-    consumed_spans: list[tuple[int, int]] = []
-
-    def _try_add_tool_call(raw_json: str) -> None:
-        try:
-            obj = json.loads(raw_json)
-        except Exception:
-            return
-        if not isinstance(obj, dict):
-            return
-        fn = obj.get("function")
-        if not isinstance(fn, dict):
-            return
-        fn_name = fn.get("name")
-        if not isinstance(fn_name, str) or not fn_name.strip():
-            return
-        fn_args = fn.get("arguments", "{}")
-        if not isinstance(fn_args, str):
-            fn_args = json.dumps(fn_args, ensure_ascii=False)
-        call_id = obj.get("id")
-        if not isinstance(call_id, str) or not call_id.strip():
-            call_id = f"acp_call_{len(extracted) + 1}"
-
-        extracted.append(
-            SimpleNamespace(
-                id=call_id,
-                call_id=call_id,
-                response_item_id=None,
-                type="function",
-                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
-            )
-        )
-
-    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
-        raw = m.group(1)
-        _try_add_tool_call(raw)
-        consumed_spans.append((m.start(), m.end()))
-
-    # Only try bare-JSON fallback when no XML blocks were found.
-    if not extracted:
-        for m in _TOOL_CALL_JSON_RE.finditer(text):
-            raw = m.group(0)
-            _try_add_tool_call(raw)
-            consumed_spans.append((m.start(), m.end()))
-
-    if not consumed_spans:
-        return extracted, text.strip()
-
-    consumed_spans.sort()
-    merged: list[tuple[int, int]] = []
-    for start, end in consumed_spans:
-        if not merged or start > merged[-1][1]:
-            merged.append((start, end))
-        else:
-            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
-
-    parts: list[str] = []
-    cursor = 0
-    for start, end in merged:
-        if cursor < start:
-            parts.append(text[cursor:start])
-        cursor = max(cursor, end)
-    if cursor < len(text):
-        parts.append(text[cursor:])
-
-    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
-    return extracted, cleaned
-
-
-def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
-    candidate = Path(path_text)
-    if not candidate.is_absolute():
-        raise PermissionError("ACP file-system paths must be absolute.")
-    resolved = candidate.resolve()
-    root = Path(cwd).resolve()
-    try:
-        resolved.relative_to(root)
-    except ValueError as exc:
-        raise PermissionError(
-            f"Path '{resolved}' is outside the session cwd '{root}'."
-        ) from exc
-    return resolved
-
-
-class _ACPChatCompletions:
-    def __init__(self, client: CopilotACPClient):
-        self._client = client
-
-    def create(self, **kwargs: Any) -> Any:
-        return self._client._create_chat_completion(**kwargs)
-
-
-class _ACPChatNamespace:
-    def __init__(self, client: CopilotACPClient):
-        self.completions = _ACPChatCompletions(client)
-
-
-class CopilotACPClient:
-    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | None = None,
-        default_headers: dict[str, str] | None = None,
-        acp_command: str | None = None,
-        acp_args: list[str] | None = None,
-        acp_cwd: str | None = None,
-        command: str | None = None,
-        args: list[str] | None = None,
-        **_: Any,
-    ):
-        self.api_key = api_key or "copilot-acp"
-        self.base_url = base_url or ACP_MARKER_BASE_URL
-        self._default_headers = dict(default_headers or {})
-        self._acp_command = acp_command or command or _resolve_command()
-        self._acp_args = list(acp_args or args or _resolve_args())
-        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
-        self.chat = _ACPChatNamespace(self)
-        self.is_closed = False
-        self._active_process: subprocess.Popen[str] | None = None
-        self._active_process_lock = threading.Lock()
-
-    def close(self) -> None:
-        proc: subprocess.Popen[str] | None
-        with self._active_process_lock:
-            proc = self._active_process
-            self._active_process = None
-        self.is_closed = True
-        if proc is None:
-            return
-        try:
-            proc.terminate()
-            proc.wait(timeout=2)
-        except Exception:
-            try:
-                proc.kill()
-            except Exception:
-                pass
-
-    def _create_chat_completion(
-        self,
-        *,
-        model: str | None = None,
-        messages: list[dict[str, Any]] | None = None,
-        timeout: float | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        tool_choice: Any = None,
-        **_: Any,
-    ) -> Any:
-        prompt_text = _format_messages_as_prompt(
-            messages or [],
-            model=model,
-            tools=tools,
-            tool_choice=tool_choice,
-        )
-        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
-        # (used natively by the OpenAI SDK) rather than a plain float.
-        if timeout is None:
-            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
-        elif isinstance(timeout, (int, float)):
-            _effective_timeout = float(timeout)
-        else:
-            # httpx.Timeout or similar — pick the largest component so the
-            # subprocess has enough wall-clock time for the full response.
-            _candidates = [
-                getattr(timeout, attr, None)
-                for attr in ("read", "write", "connect", "pool", "timeout")
-            ]
-            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
-            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
-
-        response_text, reasoning_text = self._run_prompt(
-            prompt_text,
-            timeout_seconds=_effective_timeout,
-        )
-
-        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
-
-        usage = SimpleNamespace(
-            prompt_tokens=0,
-            completion_tokens=0,
-            total_tokens=0,
-            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
-        )
-        assistant_message = SimpleNamespace(
-            content=cleaned_text,
-            tool_calls=tool_calls,
-            reasoning=reasoning_text or None,
-            reasoning_content=reasoning_text or None,
-            reasoning_details=None,
-        )
-        finish_reason = "tool_calls" if tool_calls else "stop"
-        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
-        return SimpleNamespace(
-            choices=[choice],
-            usage=usage,
-            model=model or "copilot-acp",
-        )
-
-    def _run_prompt(
-        self, prompt_text: str, *, timeout_seconds: float
-    ) -> tuple[str, str]:
-        try:
-            proc = subprocess.Popen(
-                [self._acp_command] + self._acp_args,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                bufsize=1,
-                cwd=self._acp_cwd,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                f"Could not start Copilot ACP command '{self._acp_command}'. "
-                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
-            ) from exc
-
-        if proc.stdin is None or proc.stdout is None:
-            proc.kill()
-            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
-
-        self.is_closed = False
-        with self._active_process_lock:
-            self._active_process = proc
-
-        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
-        stderr_tail: deque[str] = deque(maxlen=40)
-
-        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
-            for line in proc.stdout:
-                try:
-                    inbox.put(json.loads(line))
-                except Exception:
-                    inbox.put({"raw": line.rstrip("\n")})
-
-        def _stderr_reader() -> None:
-            if proc.stderr is None:
-                return
-            for line in proc.stderr:
-                stderr_tail.append(line.rstrip("\n"))
-
-        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
-        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
-        out_thread.start()
-        err_thread.start()
-
-        next_id = 0
-
-        def _request(
-            method: str,
-            params: dict[str, Any],
-            *,
-            text_parts: list[str] | None = None,
-            reasoning_parts: list[str] | None = None,
-        ) -> Any:
-            nonlocal next_id
-            next_id += 1
-            request_id = next_id
-            payload = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "method": method,
-                "params": params,
-            }
-            assert proc.stdin is not None  # always set: Popen(stdin=PIPE)
-            proc.stdin.write(json.dumps(payload) + "\n")
-            proc.stdin.flush()
-
-            deadline = time.time() + timeout_seconds
-            while time.time() < deadline:
-                if proc.poll() is not None:
-                    break
-                try:
-                    msg = inbox.get(timeout=0.1)
-                except queue.Empty:
-                    continue
-
-                if self._handle_server_message(
-                    msg,
-                    process=proc,
-                    cwd=self._acp_cwd,
-                    text_parts=text_parts,
-                    reasoning_parts=reasoning_parts,
-                ):
-                    continue
-
-                if msg.get("id") != request_id:
-                    continue
-                if "error" in msg:
-                    err = msg.get("error") or {}
-                    raise RuntimeError(
-                        f"Copilot ACP {method} failed: {err.get('message') or err}"
-                    )
-                return msg.get("result")
-
-            stderr_text = "\n".join(stderr_tail).strip()
-            if proc.poll() is not None and stderr_text:
-                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
-            raise TimeoutError(
-                f"Timed out waiting for Copilot ACP response to {method}."
-            )
-
-        try:
-            _request(
-                "initialize",
-                {
-                    "protocolVersion": 1,
-                    "clientCapabilities": {
-                        "fs": {
-                            "readTextFile": True,
-                            "writeTextFile": True,
-                        }
-                    },
-                    "clientInfo": {
-                        "name": "hermes-agent",
-                        "title": "Hermes Agent",
-                        "version": "0.0.0",
-                    },
-                },
-            )
-            session = (
-                _request(
-                    "session/new",
-                    {
-                        "cwd": self._acp_cwd,
-                        "mcpServers": [],
-                    },
-                )
-                or {}
-            )
-            session_id = str(session.get("sessionId") or "").strip()
-            if not session_id:
-                raise RuntimeError("Copilot ACP did not return a sessionId.")
-
-            text_parts: list[str] = []
-            reasoning_parts: list[str] = []
-            _request(
-                "session/prompt",
-                {
-                    "sessionId": session_id,
-                    "prompt": [
-                        {
-                            "type": "text",
-                            "text": prompt_text,
-                        }
-                    ],
-                },
-                text_parts=text_parts,
-                reasoning_parts=reasoning_parts,
-            )
-            return "".join(text_parts), "".join(reasoning_parts)
-        finally:
-            self.close()
-
-    def _handle_server_message(
-        self,
-        msg: dict[str, Any],
-        *,
-        process: subprocess.Popen[str],
-        cwd: str,
-        text_parts: list[str] | None,
-        reasoning_parts: list[str] | None,
-    ) -> bool:
-        method = msg.get("method")
-        if not isinstance(method, str):
-            return False
-
-        if method == "session/update":
-            params = msg.get("params") or {}
-            update = params.get("update") or {}
-            kind = str(update.get("sessionUpdate") or "").strip()
-            content = update.get("content") or {}
-            chunk_text = ""
-            if isinstance(content, dict):
-                chunk_text = str(content.get("text") or "")
-            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
-                text_parts.append(chunk_text)
-            elif (
-                kind == "agent_thought_chunk"
-                and chunk_text
-                and reasoning_parts is not None
-            ):
-                reasoning_parts.append(chunk_text)
-            return True
-
-        if process.stdin is None:
-            return True
-
-        message_id = msg.get("id")
-        params = msg.get("params") or {}
-
-        if method == "session/request_permission":
-            response = _permission_denied(message_id)
-        elif method == "fs/read_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
-                content = path.read_text() if path.exists() else ""
-                line = params.get("line")
-                limit = params.get("limit")
-                if isinstance(line, int) and line > 1:
-                    lines = content.splitlines(keepends=True)
-                    start = line - 1
-                    end = (
-                        start + limit if isinstance(limit, int) and limit > 0 else None
-                    )
-                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content)
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": {
-                        "content": content,
-                    },
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        elif method == "fs/write_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
-                path.parent.mkdir(parents=True, exist_ok=True)
-                path.write_text(str(params.get("content") or ""))
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": None,
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        else:
-            response = _jsonrpc_error(
-                message_id,
-                -32601,
-                f"ACP client method '{method}' is not supported by Hermes yet.",
-            )
-
-        process.stdin.write(json.dumps(response) + "\n")
-        process.stdin.flush()
-        return True
@@ -82,8 +82,6 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -151,31 +149,22 @@ def _fixed_temperature_for_model(
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-def _get_aux_model_for_provider(provider_id: str) -> str:
-    """Return the cheap auxiliary model for a provider.
-
-    Reads from ProviderProfile.default_aux_model first, falling back to the
-    legacy hardcoded dict for providers that predate the profiles system.
-    """
-    try:
-        from providers import get_provider_profile
-        _p = get_provider_profile(provider_id)
-        if _p and _p.default_aux_model:
-            return _p.default_aux_model
-    except Exception:
-        pass
-    return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
-
-
-# Fallback for providers not yet migrated to ProviderProfile.default_aux_model.
-# New providers should set default_aux_model on their profile instead.
-_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
+    "gemini": "gemini-3-flash-preview",
+    "zai": "glm-4.5-flash",
+    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
+    "kimi-coding-cn": "kimi-k2-turbo-preview",
+    "minimax": "MiniMax-M2.7",
+    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
+    "ai-gateway": "google/gemini-3-flash",
+    "opencode-zen": "gemini-3-flash",
+    "opencode-go": "glm-5",
+    "kilocode": "google/gemini-3-flash-preview",
+    "ollama-cloud": "nemotron-3-nano:30b",
 }

-# Legacy alias — callers that haven't been updated yet can still use this.
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
-
 # Vision-specific model overrides for direct providers.
 # When the user's main provider has a dedicated vision/multimodal model that
 # differs from their main chat model, map it here.  The vision auto-detect
@@ -876,7 +865,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            base_url = _to_openai_base_url(
                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
            )
-            model = _get_aux_model_for_provider(provider_id) or None
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -885,22 +874,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
-        extra = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.models import copilot_default_headers
+            extra = {}
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+                from hermes_cli.models import copilot_default_headers

-            extra["default_headers"] = copilot_default_headers()
-        else:
-            try:
-                from providers import get_provider_profile as _gpf_aux
-                _ph_aux = _gpf_aux(provider_id)
-                if _ph_aux and _ph_aux.default_headers:
-                    extra["default_headers"] = dict(_ph_aux.default_headers)
-            except Exception:
-                pass
-        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+                extra["default_headers"] = copilot_default_headers()
+            return OpenAI(api_key=api_key, base_url=base_url, **extra), model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
@@ -910,7 +891,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        base_url = _to_openai_base_url(
            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        )
-        model = _get_aux_model_for_provider(provider_id) or None
+        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -926,14 +907,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        else:
-            try:
-                from providers import get_provider_profile as _gpf_aux2
-                _ph_aux2 = _gpf_aux2(provider_id)
-                if _ph_aux2 and _ph_aux2.default_headers:
-                    extra["default_headers"] = dict(_ph_aux2.default_headers)
-            except Exception:
-                pass
        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

    return None, None
@@ -1282,7 +1255,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:

    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
-    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
+    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
    try:
        real_client = build_anthropic_client(token, base_url)
@@ -1644,14 +1617,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
 # below — never look up auth env vars ad-hoc.


-def _to_async_client(sync_client, model: str, is_vision: bool = False):
-    """Convert a sync client to its async counterpart, preserving Codex routing.
-
-    When ``is_vision=True`` and the underlying base URL is Copilot, the
-    resulting async client carries the ``Copilot-Vision-Request: true``
-    header so the request is routed to Copilot's vision-capable
-    infrastructure (otherwise vision payloads silently time out).
-    """
+def _to_async_client(sync_client, model: str):
+    """Convert a sync client to its async counterpart, preserving Codex routing."""
    from openai import AsyncOpenAI

    if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1666,7 +1633,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    except ImportError:
        pass
    try:
-        from acp_adapter.copilot_client import CopilotACPClient
+        from agent.copilot_acp_client import CopilotACPClient
        if isinstance(sync_client, CopilotACPClient):
            return sync_client, model
    except ImportError:
@@ -1680,11 +1647,9 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
-        from hermes_cli.copilot_auth import copilot_request_headers
+        from hermes_cli.models import copilot_default_headers

-        async_kwargs["default_headers"] = copilot_request_headers(
-            is_agent_turn=True, is_vision=is_vision
-        )
+        async_kwargs["default_headers"] = copilot_default_headers()
    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model
@@ -1711,7 +1676,6 @@ def resolve_provider_client(
    explicit_api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1795,7 +1759,7 @@ def resolve_provider_client(
                "auxiliary provider (using %r instead)", model, resolved)
            model = None
        final_model = model or resolved
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── OpenRouter ───────────────────────────────────────────────────
@@ -1808,7 +1772,7 @@ def resolve_provider_client(
            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1825,7 +1789,7 @@ def resolve_provider_client(
                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1852,7 +1816,7 @@ def resolve_provider_client(
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
@@ -1881,13 +1845,11 @@ def resolve_provider_client(
            if base_url_host_matches(custom_base, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
-                from hermes_cli.copilot_auth import copilot_request_headers
-                extra["default_headers"] = copilot_request_headers(
-                    is_agent_turn=True, is_vision=is_vision
-                )
+                from hermes_cli.models import copilot_default_headers
+                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
            client = _wrap_if_needed(client, final_model, custom_base)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1897,7 +1859,7 @@ def resolve_provider_client(
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
                client = _wrap_if_needed(client, final_model, _cbase)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
                       "but no endpoint credentials found")
@@ -1942,7 +1904,7 @@ def resolve_provider_client(
                            provider,
                        )
                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
-                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                        return (_to_async_client(client, final_model) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
                        real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1961,7 +1923,7 @@ def resolve_provider_client(
                    client = CodexAuxiliaryClient(client, final_model)
                else:
                    client = _wrap_if_needed(client, final_model, custom_base)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
            logger.warning(
                "resolve_provider_client: named custom provider %r has no base_url",
@@ -1993,7 +1955,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
            final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
+            return (_to_async_client(client, final_model) if async_mode else (client, final_model))

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
@@ -2010,7 +1972,7 @@ def resolve_provider_client(
            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        )

-        default_model = _get_aux_model_for_provider(provider)
+        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
@@ -2019,7 +1981,7 @@ def resolve_provider_client(
            if is_native_gemini_base_url(base_url):
                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))

        # Provider-specific headers
@@ -2027,11 +1989,9 @@ def resolve_provider_client(
        if base_url_host_matches(base_url, "api.kimi.com"):
            headers["User-Agent"] = "claude-code/0.1.0"
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.copilot_auth import copilot_request_headers
+            from hermes_cli.models import copilot_default_headers

-            headers.update(copilot_request_headers(
-                is_agent_turn=True, is_vision=is_vision
-            ))
+            headers.update(copilot_default_headers())
        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

@@ -2057,7 +2017,7 @@ def resolve_provider_client(
        client = _wrap_if_needed(client, final_model, base_url)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    if pconfig.auth_type == "external_process":
@@ -2080,7 +2040,7 @@ def resolve_provider_client(
                    "process credentials are incomplete"
                )
                return None, None
-            from acp_adapter.copilot_client import CopilotACPClient
+            from agent.copilot_acp_client import CopilotACPClient

            client = CopilotACPClient(
                api_key=api_key,
@@ -2089,7 +2049,7 @@ def resolve_provider_client(
                args=args,
            )
            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        logger.warning("resolve_provider_client: external-process provider %s not "
                       "directly supported", provider)
@@ -2125,7 +2085,7 @@ def resolve_provider_client(
            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
        )
        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2200,13 +2160,8 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
    return _normalize_aux_provider(provider)


-def _resolve_strict_vision_backend(
-    provider: str,
-    model: Optional[str] = None,
-) -> Tuple[Optional[Any], Optional[str]]:
+def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
    provider = _normalize_vision_provider(provider)
-    if provider == "copilot":
-        return resolve_provider_client("copilot", model, is_vision=True)
    if provider == "openrouter":
        return _try_openrouter()
    if provider == "nous":
@@ -2274,7 +2229,7 @@ def resolve_vision_provider_client(
            return resolved_provider, None, None
        final_model = resolved_model or default_model
        if async_mode:
-            async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
+            async_client, async_model = _to_async_client(sync_client, final_model)
            return resolved_provider, async_client, async_model
        return resolved_provider, sync_client, final_model

@@ -2306,11 +2261,8 @@ def resolve_vision_provider_client(
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(
-                    main_provider, vision_model
-                )
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
                if sync_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2318,10 +2270,10 @@ def resolve_vision_provider_client(
                    )
                    return _finalize(main_provider, sync_client, default_model)
            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
-                    api_mode=resolved_api_mode,
-                    is_vision=True)
+                    api_mode=resolved_api_mode)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2343,14 +2295,11 @@ def resolve_vision_provider_client(
        return None, None, None

    if requested in _VISION_AUTO_PROVIDER_ORDER:
-        sync_client, default_model = _resolve_strict_vision_backend(
-            requested, resolved_model
-        )
+        sync_client, default_model = _resolve_strict_vision_backend(requested)
        return _finalize(requested, sync_client, default_model)

    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
-                                             api_mode=resolved_api_mode,
-                                             is_vision=True)
+                                             api_mode=resolved_api_mode)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -2414,11 +2363,10 @@ def _client_cache_key(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2444,7 +2392,6 @@ def _refresh_nous_auxiliary_client(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
    runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2462,7 +2409,7 @@ def _refresh_nous_auxiliary_client(
            current_loop = _aio.get_event_loop()
        except RuntimeError:
            pass
-        client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
+        client, final_model = _to_async_client(sync_client, final_model or "")
    else:
        client = sync_client

@@ -2473,7 +2420,6 @@ def _refresh_nous_auxiliary_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
-        is_vision=is_vision,
    )
    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
    return client, final_model
@@ -2585,19 +2531,12 @@ def _is_openrouter_client(client: Any) -> bool:
    return False


-def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
-    """Best-effort check for cached clients that accept ``vendor/model`` IDs."""
-    if _is_openrouter_client(client):
-        return True
-    return bool(cached_default and "/" in cached_default)
-
-
 def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
-    """Keep slash-bearing model IDs only for cached clients that support them.
+    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.

    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
    """
-    if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
+    if model and "/" in model and not _is_openrouter_client(client):
        return cached_default
    return model or cached_default

@@ -2610,7 +2549,6 @@ def _get_cached_client(
    api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -2647,7 +2585,6 @@ def _get_cached_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
-        is_vision=is_vision,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -2679,7 +2616,6 @@ def _get_cached_client(
        explicit_api_key=api_key,
        api_mode=api_mode,
        main_runtime=runtime,
-        is_vision=is_vision,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -3143,7 +3079,6 @@ def call_llm(
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
                main_runtime=main_runtime,
-                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3434,7 +3369,6 @@ async def async_call_llm(
                base_url=resolved_base_url,
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
-                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3503,9 +3437,7 @@ async def async_call_llm(
                    extra_body=effective_extra_body,
                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                # Convert sync fallback client to async
-                async_fb, async_fb_model = _to_async_client(
-                    fb_client, fb_model or "", is_vision=(task == "vision")
-                )
+                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
                    fb_kwargs["model"] = async_fb_model
                return _validate_llm_response(
@@ -61,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
-# Flat token cost per attached image part.  Real cost varies by provider and
-# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
-# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
-# that keeps compression budgeting honest for multi-image conversations.
-# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
-_IMAGE_TOKEN_ESTIMATE = 1600
-# Same figure expressed in the char-budget currency the rest of the
-# compressor speaks in.  Used when accumulating message "content length"
-# for tail-cut decisions.
-_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-def _content_length_for_budget(raw_content: Any) -> int:
-    """Return the effective char-length of a message's content for token budgeting.
-
-    Plain strings: ``len(content)``. Multimodal lists: sum of text-part
-    ``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
-    (``image_url`` / ``input_image`` / Anthropic-style ``image``). This
-    keeps the compressor from treating a turn with 5 attached images as
-    near-zero tokens just because the text part is empty.
-    """
-    if isinstance(raw_content, str):
-        return len(raw_content)
-    if not isinstance(raw_content, list):
-        return len(str(raw_content or ""))
-
-    total = 0
-    for p in raw_content:
-        if isinstance(p, str):
-            total += len(p)
-            continue
-        if not isinstance(p, dict):
-            total += len(str(p))
-            continue
-        ptype = p.get("type")
-        if ptype in {"image_url", "input_image", "image"}:
-            total += _IMAGE_CHAR_EQUIVALENT
-        else:
-            # text / input_text / tool_result-with-text / anything else with
-            # a text field.  Ignore the raw base64 payload inside image_url
-            # dicts — dimensions don't matter, only whether it's an image.
-            total += len(p.get("text", "") or "")
-    return total
-
-
 def _content_text_for_contains(content: Any) -> str:
    """Return a best-effort text view of message content.

@@ -338,10 +295,6 @@ class ContextCompressor(ContextEngine):
        self._context_probe_persistable = False
        self._previous_summary = None
        self._last_summary_error = None
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
-        self._last_aux_model_failure_error = None
-        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -445,17 +398,6 @@ class ContextCompressor(ContextEngine):
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None
-        # When summary generation fails and a static fallback is inserted,
-        # record how many turns were unrecoverably dropped so callers
-        # (gateway hygiene, /compress) can surface a visible warning.
-        self._last_summary_dropped_count: int = 0
-        self._last_summary_fallback_used: bool = False
-        # When a user-configured summary model fails and we recover by
-        # retrying on the main model, record the failure so gateway /
-        # CLI callers can still warn the user even though compression
-        # succeeded.  Silent recovery would hide the broken config.
-        self._last_aux_model_failure_error: Optional[str] = None
-        self._last_aux_model_failure_model: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -542,7 +484,7 @@ class ContextCompressor(ContextEngine):
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
-                content_len = _content_length_for_budget(raw_content)
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -915,50 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
-                # Record the aux-model failure so callers can warn the user
-                # even if the retry-on-main succeeds — a misconfigured aux
-                # model is something the user needs to fix.
-                _err_text = str(e).strip() or e.__class__.__name__
-                if len(_err_text) > 220:
-                    _err_text = _err_text[:217].rstrip() + "..."
-                self._last_aux_model_failure_error = _err_text
-                self._last_aux_model_failure_model = self.summary_model
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

-            # Unknown-error best-effort retry on main model.  Losing N turns of
-            # context is almost always worse than one extra summary attempt, so
-            # if we haven't already fallen back and the summary model differs
-            # from the main model, try once more on main before entering
-            # cooldown.  Errors that DID match _is_model_not_found above are
-            # already handled by the fast-path retry; this branch catches
-            # everything else (400s, provider-specific "no route" strings,
-            # aggregator rejections, etc.) where auto-retry is still safer
-            # than dropping the turns.
-            if (
-                self.summary_model
-                and self.summary_model != self.model
-                and not getattr(self, "_summary_model_fallen_back", False)
-            ):
-                self._summary_model_fallen_back = True
-                logging.warning(
-                    "Summary model '%s' failed (%s). "
-                    "Retrying on main model '%s' before giving up.",
-                    self.summary_model, e, self.model,
-                )
-                # Record the aux-model failure (see 404 branch above) — user
-                # should know their configured model is broken even if main
-                # recovers the call.
-                _err_text = str(e).strip() or e.__class__.__name__
-                if len(_err_text) > 220:
-                    _err_text = _err_text[:217].rstrip() + "..."
-                self._last_aux_model_failure_error = _err_text
-                self._last_aux_model_failure_model = self.summary_model
-                self.summary_model = ""  # empty = use main model
-                self._summary_failure_cooldown_until = 0.0
-                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
-
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
@@ -1180,9 +1082,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
-            raw_content = msg.get("content") or ""
-            content_len = _content_length_for_budget(raw_content)
-            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            content = msg.get("content") or ""
+            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
                if isinstance(tc, dict):
@@ -1251,13 +1152,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
-        # Reset per-call summary failure state — callers inspect these fields
-        # after compress() returns to decide whether to surface a warning.
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
-        self._last_summary_error = None
-        self._last_aux_model_failure_error = None
-        self._last_aux_model_failure_model = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1336,13 +1230,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
-            self._last_summary_dropped_count = n_dropped
-            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} message(s) were "
+                f"Summary generation was unavailable. {n_dropped} conversation turns were "
                f"removed to free context space but could not be summarized. The removed "
-                f"messages contained earlier work in this session. Continue based on the "
+                f"turns contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -1,8 +1,646 @@
-"""Backward-compatibility shim.
+"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.

-CopilotACPClient has moved to acp_adapter/copilot_client.py.
-This module re-exports it so existing callers continue to work.
+This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
+backend. Each request starts a short-lived ACP session, sends the formatted
+conversation as a single prompt, collects text chunks, and converts the result
+back into the minimal shape Hermes expects from an OpenAI client.
 """
-from acp_adapter.copilot_client import CopilotACPClient  # noqa: F401

-__all__ = ["CopilotACPClient"]
+from __future__ import annotations
+
+import json
+import os
+import queue
+import re
+import shlex
+import subprocess
+import threading
+import time
+from collections import deque
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
+ACP_MARKER_BASE_URL = "acp://copilot"
+_DEFAULT_TIMEOUT_SECONDS = 900.0
+
+_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
+_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
+
+
+def _resolve_command() -> str:
+    return (
+        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
+        or os.getenv("COPILOT_CLI_PATH", "").strip()
+        or "copilot"
+    )
+
+
+def _resolve_args() -> list[str]:
+    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
+    if not raw:
+        return ["--acp", "--stdio"]
+    return shlex.split(raw)
+
+
+def _resolve_home_dir() -> str:
+    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
+    home = os.environ.get("HOME", "").strip()
+    if home:
+        return home
+
+    expanded = os.path.expanduser("~")
+    if expanded and expanded != "~":
+        return expanded
+
+    try:
+        import pwd
+
+        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
+        if resolved:
+            return resolved
+    except Exception:
+        pass
+
+    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
+    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
+    # need a different writable dir.
+    return "/tmp"
+
+
+def _build_subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["HOME"] = _resolve_home_dir()
+    return env
+
+
+def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "error": {
+            "code": code,
+            "message": message,
+        },
+    }
+
+
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
+def _format_messages_as_prompt(
+    messages: list[dict[str, Any]],
+    model: str | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: Any = None,
+) -> str:
+    sections: list[str] = [
+        "You are being used as the active ACP agent backend for Hermes.",
+        "Use ACP capabilities to complete tasks.",
+        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
+        "If no tool is needed, answer normally.",
+    ]
+    if model:
+        sections.append(f"Hermes requested model hint: {model}")
+
+    if isinstance(tools, list) and tools:
+        tool_specs: list[dict[str, Any]] = []
+        for t in tools:
+            if not isinstance(t, dict):
+                continue
+            fn = t.get("function") or {}
+            if not isinstance(fn, dict):
+                continue
+            name = fn.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            tool_specs.append(
+                {
+                    "name": name.strip(),
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                }
+            )
+        if tool_specs:
+            sections.append(
+                "Available tools (OpenAI function schema). "
+                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
+                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+                + json.dumps(tool_specs, ensure_ascii=False)
+            )
+
+    if tool_choice is not None:
+        sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
+
+    transcript: list[str] = []
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+        role = str(message.get("role") or "unknown").strip().lower()
+        if role == "tool":
+            role = "tool"
+        elif role not in {"system", "user", "assistant"}:
+            role = "context"
+
+        content = message.get("content")
+        rendered = _render_message_content(content)
+        if not rendered:
+            continue
+
+        label = {
+            "system": "System",
+            "user": "User",
+            "assistant": "Assistant",
+            "tool": "Tool",
+            "context": "Context",
+        }.get(role, role.title())
+        transcript.append(f"{label}:\n{rendered}")
+
+    if transcript:
+        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
+
+    sections.append("Continue the conversation from the latest user request.")
+    return "\n\n".join(section.strip() for section in sections if section and section.strip())
+
+
+def _render_message_content(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, dict):
+        if "text" in content:
+            return str(content.get("text") or "").strip()
+        if "content" in content and isinstance(content.get("content"), str):
+            return str(content.get("content") or "").strip()
+        return json.dumps(content, ensure_ascii=True)
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str) and text.strip():
+                    parts.append(text.strip())
+        return "\n".join(parts).strip()
+    return str(content).strip()
+
+
+def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
+    if not isinstance(text, str) or not text.strip():
+        return [], ""
+
+    extracted: list[SimpleNamespace] = []
+    consumed_spans: list[tuple[int, int]] = []
+
+    def _try_add_tool_call(raw_json: str) -> None:
+        try:
+            obj = json.loads(raw_json)
+        except Exception:
+            return
+        if not isinstance(obj, dict):
+            return
+        fn = obj.get("function")
+        if not isinstance(fn, dict):
+            return
+        fn_name = fn.get("name")
+        if not isinstance(fn_name, str) or not fn_name.strip():
+            return
+        fn_args = fn.get("arguments", "{}")
+        if not isinstance(fn_args, str):
+            fn_args = json.dumps(fn_args, ensure_ascii=False)
+        call_id = obj.get("id")
+        if not isinstance(call_id, str) or not call_id.strip():
+            call_id = f"acp_call_{len(extracted)+1}"
+
+        extracted.append(
+            SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=None,
+                type="function",
+                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
+            )
+        )
+
+    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
+        raw = m.group(1)
+        _try_add_tool_call(raw)
+        consumed_spans.append((m.start(), m.end()))
+
+    # Only try bare-JSON fallback when no XML blocks were found.
+    if not extracted:
+        for m in _TOOL_CALL_JSON_RE.finditer(text):
+            raw = m.group(0)
+            _try_add_tool_call(raw)
+            consumed_spans.append((m.start(), m.end()))
+
+    if not consumed_spans:
+        return extracted, text.strip()
+
+    consumed_spans.sort()
+    merged: list[tuple[int, int]] = []
+    for start, end in consumed_spans:
+        if not merged or start > merged[-1][1]:
+            merged.append((start, end))
+        else:
+            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+
+    parts: list[str] = []
+    cursor = 0
+    for start, end in merged:
+        if cursor < start:
+            parts.append(text[cursor:start])
+        cursor = max(cursor, end)
+    if cursor < len(text):
+        parts.append(text[cursor:])
+
+    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
+    return extracted, cleaned
+
+
+
+def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
+    candidate = Path(path_text)
+    if not candidate.is_absolute():
+        raise PermissionError("ACP file-system paths must be absolute.")
+    resolved = candidate.resolve()
+    root = Path(cwd).resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError as exc:
+        raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
+    return resolved
+
+
+class _ACPChatCompletions:
+    def __init__(self, client: "CopilotACPClient"):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _ACPChatNamespace:
+    def __init__(self, client: "CopilotACPClient"):
+        self.completions = _ACPChatCompletions(client)
+
+
+class CopilotACPClient:
+    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        default_headers: dict[str, str] | None = None,
+        acp_command: str | None = None,
+        acp_args: list[str] | None = None,
+        acp_cwd: str | None = None,
+        command: str | None = None,
+        args: list[str] | None = None,
+        **_: Any,
+    ):
+        self.api_key = api_key or "copilot-acp"
+        self.base_url = base_url or ACP_MARKER_BASE_URL
+        self._default_headers = dict(default_headers or {})
+        self._acp_command = acp_command or command or _resolve_command()
+        self._acp_args = list(acp_args or args or _resolve_args())
+        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
+        self.chat = _ACPChatNamespace(self)
+        self.is_closed = False
+        self._active_process: subprocess.Popen[str] | None = None
+        self._active_process_lock = threading.Lock()
+
+    def close(self) -> None:
+        proc: subprocess.Popen[str] | None
+        with self._active_process_lock:
+            proc = self._active_process
+            self._active_process = None
+        self.is_closed = True
+        if proc is None:
+            return
+        try:
+            proc.terminate()
+            proc.wait(timeout=2)
+        except Exception:
+            try:
+                proc.kill()
+            except Exception:
+                pass
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str | None = None,
+        messages: list[dict[str, Any]] | None = None,
+        timeout: float | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: Any = None,
+        **_: Any,
+    ) -> Any:
+        prompt_text = _format_messages_as_prompt(
+            messages or [],
+            model=model,
+            tools=tools,
+            tool_choice=tool_choice,
+        )
+        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+        # (used natively by the OpenAI SDK) rather than a plain float.
+        if timeout is None:
+            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+        elif isinstance(timeout, (int, float)):
+            _effective_timeout = float(timeout)
+        else:
+            # httpx.Timeout or similar — pick the largest component so the
+            # subprocess has enough wall-clock time for the full response.
+            _candidates = [
+                getattr(timeout, attr, None)
+                for attr in ("read", "write", "connect", "pool", "timeout")
+            ]
+            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
+        response_text, reasoning_text = self._run_prompt(
+            prompt_text,
+            timeout_seconds=_effective_timeout,
+        )
+
+        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
+
+        usage = SimpleNamespace(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+        )
+        assistant_message = SimpleNamespace(
+            content=cleaned_text,
+            tool_calls=tool_calls,
+            reasoning=reasoning_text or None,
+            reasoning_content=reasoning_text or None,
+            reasoning_details=None,
+        )
+        finish_reason = "tool_calls" if tool_calls else "stop"
+        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
+        return SimpleNamespace(
+            choices=[choice],
+            usage=usage,
+            model=model or "copilot-acp",
+        )
+
+    def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
+        try:
+            proc = subprocess.Popen(
+                [self._acp_command] + self._acp_args,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                cwd=self._acp_cwd,
+                env=_build_subprocess_env(),
+            )
+        except FileNotFoundError as exc:
+            raise RuntimeError(
+                f"Could not start Copilot ACP command '{self._acp_command}'. "
+                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
+            ) from exc
+
+        if proc.stdin is None or proc.stdout is None:
+            proc.kill()
+            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
+
+        self.is_closed = False
+        with self._active_process_lock:
+            self._active_process = proc
+
+        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
+        stderr_tail: deque[str] = deque(maxlen=40)
+
+        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
+            for line in proc.stdout:
+                try:
+                    inbox.put(json.loads(line))
+                except Exception:
+                    inbox.put({"raw": line.rstrip("\n")})
+
+        def _stderr_reader() -> None:
+            if proc.stderr is None:
+                return
+            for line in proc.stderr:
+                stderr_tail.append(line.rstrip("\n"))
+
+        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
+        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
+        out_thread.start()
+        err_thread.start()
+
+        next_id = 0
+
+        def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
+            nonlocal next_id
+            next_id += 1
+            request_id = next_id
+            payload = {
+                "jsonrpc": "2.0",
+                "id": request_id,
+                "method": method,
+                "params": params,
+            }
+            proc.stdin.write(json.dumps(payload) + "\n")
+            proc.stdin.flush()
+
+            deadline = time.time() + timeout_seconds
+            while time.time() < deadline:
+                if proc.poll() is not None:
+                    break
+                try:
+                    msg = inbox.get(timeout=0.1)
+                except queue.Empty:
+                    continue
+
+                if self._handle_server_message(
+                    msg,
+                    process=proc,
+                    cwd=self._acp_cwd,
+                    text_parts=text_parts,
+                    reasoning_parts=reasoning_parts,
+                ):
+                    continue
+
+                if msg.get("id") != request_id:
+                    continue
+                if "error" in msg:
+                    err = msg.get("error") or {}
+                    raise RuntimeError(
+                        f"Copilot ACP {method} failed: {err.get('message') or err}"
+                    )
+                return msg.get("result")
+
+            stderr_text = "\n".join(stderr_tail).strip()
+            if proc.poll() is not None and stderr_text:
+                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
+            raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
+
+        try:
+            _request(
+                "initialize",
+                {
+                    "protocolVersion": 1,
+                    "clientCapabilities": {
+                        "fs": {
+                            "readTextFile": True,
+                            "writeTextFile": True,
+                        }
+                    },
+                    "clientInfo": {
+                        "name": "hermes-agent",
+                        "title": "Hermes Agent",
+                        "version": "0.0.0",
+                    },
+                },
+            )
+            session = _request(
+                "session/new",
+                {
+                    "cwd": self._acp_cwd,
+                    "mcpServers": [],
+                },
+            ) or {}
+            session_id = str(session.get("sessionId") or "").strip()
+            if not session_id:
+                raise RuntimeError("Copilot ACP did not return a sessionId.")
+
+            text_parts: list[str] = []
+            reasoning_parts: list[str] = []
+            _request(
+                "session/prompt",
+                {
+                    "sessionId": session_id,
+                    "prompt": [
+                        {
+                            "type": "text",
+                            "text": prompt_text,
+                        }
+                    ],
+                },
+                text_parts=text_parts,
+                reasoning_parts=reasoning_parts,
+            )
+            return "".join(text_parts), "".join(reasoning_parts)
+        finally:
+            self.close()
+
+    def _handle_server_message(
+        self,
+        msg: dict[str, Any],
+        *,
+        process: subprocess.Popen[str],
+        cwd: str,
+        text_parts: list[str] | None,
+        reasoning_parts: list[str] | None,
+    ) -> bool:
+        method = msg.get("method")
+        if not isinstance(method, str):
+            return False
+
+        if method == "session/update":
+            params = msg.get("params") or {}
+            update = params.get("update") or {}
+            kind = str(update.get("sessionUpdate") or "").strip()
+            content = update.get("content") or {}
+            chunk_text = ""
+            if isinstance(content, dict):
+                chunk_text = str(content.get("text") or "")
+            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
+                text_parts.append(chunk_text)
+            elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
+                reasoning_parts.append(chunk_text)
+            return True
+
+        if process.stdin is None:
+            return True
+
+        message_id = msg.get("id")
+        params = msg.get("params") or {}
+
+        if method == "session/request_permission":
+            response = _permission_denied(message_id)
+        elif method == "fs/read_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
+                content = path.read_text() if path.exists() else ""
+                line = params.get("line")
+                limit = params.get("limit")
+                if isinstance(line, int) and line > 1:
+                    lines = content.splitlines(keepends=True)
+                    start = line - 1
+                    end = start + limit if isinstance(limit, int) and limit > 0 else None
+                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": {
+                        "content": content,
+                    },
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        elif method == "fs/write_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_text(str(params.get("content") or ""))
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": None,
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        else:
+            response = _jsonrpc_error(
+                message_id,
+                -32601,
+                f"ACP client method '{method}' is not supported by Hermes yet.",
+            )
+
+        process.stdin.write(json.dumps(response) + "\n")
+        process.stdin.flush()
+        return True
@@ -42,7 +42,6 @@ class FailoverReason(enum.Enum):
    # Context / payload
    context_overflow = "context_overflow"  # Context too large — compress, not failover
    payload_too_large = "payload_too_large"  # 413 — compress payload
-    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
@@ -148,20 +147,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
    "error code: 413",
 ]

-# Image-size patterns.  Matched against 400 bodies (not 413) because most
-# providers return a 400 with a specific image-too-big message before the
-# whole request hits the 413 size limit.  Anthropic's wording is the most
-# important here (hard 5 MB per image, returned as
-# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
-_IMAGE_TOO_LARGE_PATTERNS = [
-    "image exceeds",        # Anthropic: "image exceeds 5 MB maximum"
-    "image too large",      # generic
-    "image_too_large",      # error_code variant
-    "image size exceeds",   # variant
-    # "request_too_large" on a request known to contain an image → image is
-    # the likely culprit; we still try the shrink path before giving up.
-]
-
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -686,15 +671,6 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

-    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
-    # Must be checked BEFORE context_overflow because messages can trip both
-    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
-    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
-        return result_fn(
-            FailoverReason.image_too_large,
-            retryable=True,
-        )
-
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
@@ -822,13 +798,6 @@ def _classify_by_message(
            should_compress=True,
        )

-    # Image-too-large patterns (from message text when no status_code)
-    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
-        return result_fn(
-            FailoverReason.image_too_large,
-            retryable=True,
-        )
-
    # Usage-limit patterns need the same disambiguation as 402: some providers
    # surface "usage limit" errors without an HTTP status code.  A transient
    # signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -1,236 +0,0 @@
-"""Routing helpers for inbound user-attached images.
-
-Two modes:
-
-  native  — attach images as OpenAI-style ``image_url`` content parts on the
-            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
-            OpenAI chat.completions) already translate these into their
-            vendor-specific multimodal formats.
-
-  text    — run ``vision_analyze`` on each image up-front and prepend the
-            description to the user's text. The model never sees the pixels;
-            it only sees a lossy text summary. This is the pre-existing
-            behaviour and still the right choice for non-vision models.
-
-The decision is made once per message turn by :func:`decide_image_input_mode`.
-It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
-| ``text``, default ``auto``) and the active model's capability metadata.
-
-In ``auto`` mode:
-  - If the user has explicitly configured ``auxiliary.vision.provider``
-    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
-    regardless of the main model — they've opted in to a specific vision
-    backend for a reason (cost, quality, local-only, etc.).
-  - Otherwise, if the active model reports ``supports_vision=True`` in its
-    models.dev metadata, we attach natively.
-  - Otherwise (non-vision model, no explicit override), we fall back to text.
-
-This keeps ``vision_analyze`` surfaced as a tool in every session — skills
-and agent flows that chain it (browser screenshots, deeper inspection of
-URL-referenced images, style-gating loops) keep working. The routing only
-affects *how user-attached images on the current turn* are presented to the
-main model.
-"""
-
-from __future__ import annotations
-
-import base64
-import logging
-import mimetypes
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-_VALID_MODES = frozenset({"auto", "native", "text"})
-
-
-def _coerce_mode(raw: Any) -> str:
-    """Normalize a config value into one of the valid modes."""
-    if not isinstance(raw, str):
-        return "auto"
-    val = raw.strip().lower()
-    if val in _VALID_MODES:
-        return val
-    return "auto"
-
-
-def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
-    """True when the user configured a specific auxiliary vision backend.
-
-    An explicit override means the user *wants* the text pipeline (they're
-    paying for a dedicated vision model), so we don't silently bypass it.
-    """
-    if not isinstance(cfg, dict):
-        return False
-    aux = cfg.get("auxiliary") or {}
-    if not isinstance(aux, dict):
-        return False
-    vision = aux.get("vision") or {}
-    if not isinstance(vision, dict):
-        return False
-
-    provider = str(vision.get("provider") or "").strip().lower()
-    model = str(vision.get("model") or "").strip()
-    base_url = str(vision.get("base_url") or "").strip()
-
-    # "auto" / "" / blank = not explicit
-    if provider in ("", "auto") and not model and not base_url:
-        return False
-    return True
-
-
-def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
-    """Return True/False if we can resolve caps, None if unknown."""
-    if not provider or not model:
-        return None
-    try:
-        from agent.models_dev import get_model_capabilities
-        caps = get_model_capabilities(provider, model)
-    except Exception as exc:  # pragma: no cover - defensive
-        logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
-        return None
-    if caps is None:
-        return None
-    return bool(caps.supports_vision)
-
-
-def decide_image_input_mode(
-    provider: str,
-    model: str,
-    cfg: Optional[Dict[str, Any]],
-) -> str:
-    """Return ``"native"`` or ``"text"`` for the given turn.
-
-    Args:
-      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
-      model:    active model slug as it would be sent to the provider.
-      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
-    """
-    mode_cfg = "auto"
-    if isinstance(cfg, dict):
-        agent_cfg = cfg.get("agent") or {}
-        if isinstance(agent_cfg, dict):
-            mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
-
-    if mode_cfg == "native":
-        return "native"
-    if mode_cfg == "text":
-        return "text"
-
-    # auto
-    if _explicit_aux_vision_override(cfg):
-        return "text"
-
-    supports = _lookup_supports_vision(provider, model)
-    if supports is True:
-        return "native"
-    return "text"
-
-
-# Image size handling is REACTIVE rather than proactive: we attempt native
-# attachment at full size regardless of provider, and rely on
-# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
-# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
-# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
-#
-# Why reactive: our knowledge of provider ceilings is partial and evolving
-# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
-# A proactive per-provider table would be stale the moment a provider raises
-# or lowers its limit, and silently degrading quality for users on providers
-# that would have accepted the full image is the worse failure mode.
-# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
-# it fires, which is cheaper than permanent quality loss.
-
-
-def _guess_mime(path: Path) -> str:
-    mime, _ = mimetypes.guess_type(str(path))
-    if mime and mime.startswith("image/"):
-        return mime
-    # mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
-    # the suffix looks imagey.
-    suffix = path.suffix.lower()
-    return {
-        ".jpg": "image/jpeg",
-        ".jpeg": "image/jpeg",
-        ".png": "image/png",
-        ".gif": "image/gif",
-        ".webp": "image/webp",
-        ".bmp": "image/bmp",
-    }.get(suffix, "image/jpeg")
-
-
-def _file_to_data_url(path: Path) -> Optional[str]:
-    """Encode a local image as a base64 data URL at its native size.
-
-    Size limits are NOT enforced here — the agent retry loop
-    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
-    provider's first rejection. Keeping this simple means providers that
-    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
-    quality tax just because one other provider is stricter.
-
-    Returns None only if the file can't be read (missing, permission
-    denied, etc.); the caller reports those paths in ``skipped``.
-    """
-    try:
-        raw = path.read_bytes()
-    except Exception as exc:
-        logger.warning("image_routing: failed to read %s — %s", path, exc)
-        return None
-    mime = _guess_mime(path)
-    b64 = base64.b64encode(raw).decode("ascii")
-    return f"data:{mime};base64,{b64}"
-
-
-def build_native_content_parts(
-    user_text: str,
-    image_paths: List[str],
-) -> Tuple[List[Dict[str, Any]], List[str]]:
-    """Build an OpenAI-style ``content`` list for a user turn.
-
-    Shape:
-      [{"type": "text", "text": "..."},
-       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
-       ...]
-
-    Images are attached at their native size. If a provider rejects the
-    request because an image is too large (e.g. Anthropic's 5 MB per-image
-    ceiling), the agent's retry loop transparently shrinks and retries
-    once — see ``run_agent._try_shrink_image_parts_in_messages``.
-
-    Returns (content_parts, skipped_paths). Skipped paths are files that
-    couldn't be read from disk.
-    """
-    parts: List[Dict[str, Any]] = []
-    skipped: List[str] = []
-
-    text = (user_text or "").strip()
-    if text:
-        parts.append({"type": "text", "text": text})
-
-    for raw_path in image_paths:
-        p = Path(raw_path)
-        if not p.exists() or not p.is_file():
-            skipped.append(str(raw_path))
-            continue
-        data_url = _file_to_data_url(p)
-        if not data_url:
-            skipped.append(str(raw_path))
-            continue
-        parts.append({
-            "type": "image_url",
-            "image_url": {"url": data_url},
-        })
-
-    # If the text was empty, add a neutral prompt so the turn isn't just images.
-    if not text and any(p.get("type") == "image_url" for p in parts):
-        parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
-
-    return parts, skipped
-
-
-__all__ = [
-    "decide_image_input_mode",
-    "build_native_content_parts",
-]
@@ -63,124 +63,15 @@ def sanitize_context(text: str) -> str:
    return text


-class StreamingContextScrubber:
-    """Stateful scrubber for streaming text that may contain split memory-context spans.
-
-    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
-    a ``<memory-context>`` opened in one delta and closed in a later delta
-    leaks its payload to the UI because the non-greedy block regex needs
-    both tags in one string.  This scrubber runs a small state machine
-    across deltas, holding back partial-tag tails and discarding
-    everything inside a span (including the system-note line).
-
-    Usage::
-
-        scrubber = StreamingContextScrubber()
-        for delta in stream:
-            visible = scrubber.feed(delta)
-            if visible:
-                emit(visible)
-        trailing = scrubber.flush()  # at end of stream
-        if trailing:
-            emit(trailing)
-
-    The scrubber is re-entrant per agent instance.  Callers building new
-    top-level responses (new turn) should create a fresh scrubber or call
-    ``reset()``.
-    """
-
-    _OPEN_TAG = "<memory-context>"
-    _CLOSE_TAG = "</memory-context>"
-
-    def __init__(self) -> None:
-        self._in_span: bool = False
-        self._buf: str = ""
-
-    def reset(self) -> None:
-        self._in_span = False
-        self._buf = ""
-
-    def feed(self, text: str) -> str:
-        """Return the visible portion of ``text`` after scrubbing.
-
-        Any trailing fragment that could be the start of an open/close tag
-        is held back in the internal buffer and surfaced on the next
-        ``feed()`` call or discarded/emitted by ``flush()``.
-        """
-        if not text:
-            return ""
-        buf = self._buf + text
-        self._buf = ""
-        out: list[str] = []
-
-        while buf:
-            if self._in_span:
-                idx = buf.lower().find(self._CLOSE_TAG)
-                if idx == -1:
-                    # Hold back a potential partial close tag; drop the rest
-                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
-                    self._buf = buf[-held:] if held else ""
-                    return "".join(out)
-                # Found close — skip span content + tag, continue
-                buf = buf[idx + len(self._CLOSE_TAG):]
-                self._in_span = False
-            else:
-                idx = buf.lower().find(self._OPEN_TAG)
-                if idx == -1:
-                    # No open tag — hold back a potential partial open tag
-                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
-                    if held:
-                        out.append(buf[:-held])
-                        self._buf = buf[-held:]
-                    else:
-                        out.append(buf)
-                    return "".join(out)
-                # Emit text before the tag, enter span
-                if idx > 0:
-                    out.append(buf[:idx])
-                buf = buf[idx + len(self._OPEN_TAG):]
-                self._in_span = True
-
-        return "".join(out)
-
-    def flush(self) -> str:
-        """Emit any held-back buffer at end-of-stream.
-
-        If we're still inside an unterminated span the remaining content is
-        discarded (safer: leaking partial memory context is worse than a
-        truncated answer).  Otherwise the held-back partial-tag tail is
-        emitted verbatim (it turned out not to be a real tag).
-        """
-        if self._in_span:
-            self._buf = ""
-            self._in_span = False
-            return ""
-        tail = self._buf
-        self._buf = ""
-        return tail
-
-    @staticmethod
-    def _max_partial_suffix(buf: str, tag: str) -> int:
-        """Return the length of the longest buf-suffix that is a tag-prefix.
-
-        Case-insensitive.  Returns 0 if no suffix could start the tag.
-        """
-        tag_lower = tag.lower()
-        buf_lower = buf.lower()
-        max_check = min(len(buf_lower), len(tag_lower) - 1)
-        for i in range(max_check, 0, -1):
-            if tag_lower.startswith(buf_lower[-i:]):
-                return i
-        return 0
-
-
 def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note."""
+    """Wrap prefetched memory in a fenced block with system note.
+
+    The fence prevents the model from treating recalled context as user
+    discourse.  Injected at API-call time only — never persisted.
+    """
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
-    if clean != raw_context:
-        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -51,7 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
-    "gmi",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
-    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -309,21 +307,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
-    "api.gmi-serving.com": "gmi",
    "ollama.com": "ollama-cloud",
 }

-# Auto-extend with hostnames derived from provider profiles.
-# Any provider with a base_url not already in the map gets added automatically.
-try:
-    from providers import list_providers as _list_providers
-    for _pp in _list_providers():
-        _host = _pp.get_hostname()
-        if _host and _host not in _URL_TO_PROVIDER:
-            _URL_TO_PROVIDER[_host] = _pp.name
-except Exception:
-    pass
-

 def _infer_provider_from_url(base_url: str) -> Optional[str]:
    """Infer the models.dev provider name from a base URL.
@@ -716,29 +702,6 @@ def fetch_endpoint_model_metadata(
    return {}


-def _resolve_endpoint_context_length(
-    model: str,
-    base_url: str,
-    api_key: str = "",
-) -> Optional[int]:
-    """Resolve context length from an endpoint's live ``/models`` metadata."""
-    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-    matched = endpoint_metadata.get(model)
-    if not matched:
-        if len(endpoint_metadata) == 1:
-            matched = next(iter(endpoint_metadata.values()))
-        else:
-            for key, entry in endpoint_metadata.items():
-                if model in key or key in model:
-                    matched = entry
-                    break
-    if matched:
-        context_length = matched.get("context_length")
-        if isinstance(context_length, int):
-            return context_length
-    return None
-
-
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -1332,9 +1295,22 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if context_length is not None:
-            return context_length
+        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+        matched = endpoint_metadata.get(model)
+        if not matched:
+            # Single-model servers: if only one model is loaded, use it
+            if len(endpoint_metadata) == 1:
+                matched = next(iter(endpoint_metadata.values()))
+            else:
+                # Fuzzy match: substring in either direction
+                for key, entry in endpoint_metadata.items():
+                    if model in key or key in model:
+                        matched = entry
+                        break
+        if matched:
+            context_length = matched.get("context_length")
+            if isinstance(context_length, int):
+                return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
@@ -1398,12 +1374,6 @@ def get_model_context_length(
            if base_url:
                save_context_length(model, base_url, codex_ctx)
            return codex_ctx
-    if effective_provider == "gmi" and base_url:
-        # GMI exposes authoritative context_length via /models, but it is not
-        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
-        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if ctx is not None:
-            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
    "Be targeted and efficient in your exploration and investigations."
 )

-HERMES_AGENT_HELP_GUIDANCE = (
-    "If the user asks about configuring, setting up, or using Hermes Agent "
-    "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
-    "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
-)
-
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.

 import logging
 import threading
-from typing import Callable, Optional
+from typing import Optional

 from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

-# Callback signature: (task_name, exception) -> None. Used to surface
-# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
-# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
-# become visible instead of piling up as NULL session titles.
-FailureCallback = Callable[[str, BaseException], None]
-
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
    "following exchange. The title should capture the main topic or intent. "
@@ -25,21 +19,11 @@ _TITLE_PROMPT = (
 )


-def generate_title(
-    user_message: str,
-    assistant_response: str,
-    timeout: float = 30.0,
-    failure_callback: Optional[FailureCallback] = None,
-) -> Optional[str]:
+def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
    """Generate a session title from the first exchange.

    Uses the auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.
-
-    ``failure_callback`` is invoked with ``(task, exception)`` when the
-    auxiliary call raises — the caller typically wires this to
-    ``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
-    of silently accumulating untitled sessions.
    """
    # Truncate long messages to keep the request small
    user_snippet = user_message[:500] if user_message else ""
@@ -68,15 +52,7 @@ def generate_title(
            title = title[:77] + "..."
        return title if title else None
    except Exception as e:
-        # Log at WARNING so this shows up in agent.log without debug mode.
-        # Full detail at debug level for operators who need the stack.
-        logger.warning("Title generation failed: %s", e)
-        logger.debug("Title generation traceback", exc_info=True)
-        if failure_callback is not None:
-            try:
-                failure_callback("title generation", e)
-            except Exception:
-                logger.debug("Title generation failure_callback raised", exc_info=True)
+        logger.debug("Title generation failed: %s", e)
        return None


@@ -85,7 +61,6 @@ def auto_title_session(
    session_id: str,
    user_message: str,
    assistant_response: str,
-    failure_callback: Optional[FailureCallback] = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -106,9 +81,7 @@ def auto_title_session(
    except Exception:
        return

-    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback
-    )
+    title = generate_title(user_message, assistant_response)
    if not title:
        return

@@ -125,7 +98,6 @@ def maybe_auto_title(
    user_message: str,
    assistant_response: str,
    conversation_history: list,
-    failure_callback: Optional[FailureCallback] = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -147,7 +119,6 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback},
        daemon=True,
        name="auto-title",
    )
@@ -6,16 +6,9 @@ Usage:
    result = transport.normalize_response(raw_response)
 """

-from agent.transports.types import (
-    NormalizedResponse,
-    ToolCall,
-    Usage,
-    build_tool_call,
-    map_finish_reason,
-)  # noqa: F401
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401

 _REGISTRY: dict = {}
-_discovered: bool = False


 def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -30,9 +23,6 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    global _discovered
-    if not _discovered:
-        _discover_transports()
    cls = _REGISTRY.get(api_mode)
    if cls is None:
        # The registry can be partially populated when a specific transport
@@ -48,8 +38,6 @@ def get_transport(api_mode: str):

 def _discover_transports() -> None:
    """Import all transport modules to trigger auto-registration."""
-    global _discovered
-    _discovered = True
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
@@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 """

 import copy
-from typing import Any
+from typing import Any, Dict, List, Optional

 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
@@ -28,9 +28,7 @@ class ChatCompletionsTransport(ProviderTransport):
    def api_mode(self) -> str:
        return "chat_completions"

-    def convert_messages(
-        self, messages: list[dict[str, Any]], **kwargs
-    ) -> list[dict[str, Any]]:
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

        Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -47,9 +45,7 @@ class ChatCompletionsTransport(ProviderTransport):
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
-                    if isinstance(tc, dict) and (
-                        "call_id" in tc or "response_item_id" in tc
-                    ):
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
                        needs_sanitize = True
                        break
                if needs_sanitize:
@@ -72,52 +68,76 @@ class ChatCompletionsTransport(ProviderTransport):
                        tc.pop("response_item_id", None)
        return sanitized

-    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Tools are already in OpenAI format — identity."""
        return tools

    def build_kwargs(
        self,
        model: str,
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
        **params,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
        """Build chat.completions.create() kwargs.

-        params (all optional):
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
            timeout: float — API call timeout
            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
            reasoning_config: dict | None
            request_overrides: dict | None
            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
            model_lower: str — lowercase model name for pattern matching
-            # Provider profile path (all per-provider quirks live in providers/)
-            provider_profile: ProviderProfile | None — when present, delegates to
-                _build_kwargs_from_profile(); all flag params below are bypassed.
-            # Remaining flags — only used by the legacy fallback for unregistered
-            # providers (i.e. get_provider_profile() returned None).  Known
-            # providers all go through provider_profile.
-            qwen_session_metadata: dict | None
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
-            extra_body_additions: dict | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
        """
        # Codex sanitization: drop reasoning_items / call_id / response_item_id
        sanitized = self.convert_messages(messages)

-        # ── Provider profile: single-path when present ──────────────────
-        _profile = params.get("provider_profile")
-        if _profile:
-            return self._build_kwargs_from_profile(
-                _profile, model, sanitized, tools, params
-            )
-
-        # ── Legacy fallback (unregistered / unknown provider) ───────────
-        # Reached only when get_provider_profile() returned None.
-        # Known providers always go through the profile path above.
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)

        # Developer role swap for GPT-5/Codex models
        model_lower = params.get("model_lower", (model or "").lower())
@@ -130,7 +150,7 @@ class ChatCompletionsTransport(ProviderTransport):
            sanitized = list(sanitized)
            sanitized[0] = {**sanitized[0], "role": "developer"}

-        api_kwargs: dict[str, Any] = {
+        api_kwargs: Dict[str, Any] = {
            "model": model,
            "messages": sanitized,
        }
@@ -139,6 +159,19 @@ class ChatCompletionsTransport(ProviderTransport):
        if timeout is not None:
            api_kwargs["timeout"] = timeout

+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
        # Tools
        if tools:
            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
@@ -153,24 +186,96 @@ class ChatCompletionsTransport(ProviderTransport):
        ephemeral = params.get("ephemeral_max_output_tokens")
        max_tokens = params.get("max_tokens")
        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(ephemeral))
        elif max_tokens is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
        elif anthropic_max_out is not None:
            api_kwargs["max_tokens"] = anthropic_max_out

-        # extra_body assembly
-        extra_body: dict[str, Any] = {}
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort

-        # Generic reasoning passthrough for unknown providers
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
        if params.get("supports_reasoning", False):
-            reasoning_config = params.get("reasoning_config")
-            if reasoning_config is not None:
-                extra_body["reasoning"] = dict(reasoning_config)
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True

        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
@@ -187,117 +292,6 @@ class ChatCompletionsTransport(ProviderTransport):

        return api_kwargs

-    def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
-        """Build API kwargs using a ProviderProfile — single path, no legacy flags.
-
-        This method replaces the entire flag-based kwargs assembly when a
-        provider_profile is passed. Every quirk comes from the profile object.
-        """
-        from providers.base import OMIT_TEMPERATURE
-
-        # Message preprocessing
-        sanitized = profile.prepare_messages(sanitized)
-
-        # Developer role swap — model-name-based, applies to all providers
-        _model_lower = (model or "").lower()
-        if (
-            sanitized
-            and isinstance(sanitized[0], dict)
-            and sanitized[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            sanitized = list(sanitized)
-            sanitized[0] = {**sanitized[0], "role": "developer"}
-
-        api_kwargs: dict[str, Any] = {
-            "model": model,
-            "messages": sanitized,
-        }
-
-        # Temperature
-        if profile.fixed_temperature is OMIT_TEMPERATURE:
-            pass  # Don't include temperature at all
-        elif profile.fixed_temperature is not None:
-            api_kwargs["temperature"] = profile.fixed_temperature
-        else:
-            # Use caller's temperature if provided
-            temp = params.get("temperature")
-            if temp is not None:
-                api_kwargs["temperature"] = temp
-
-        # Timeout
-        timeout = params.get("timeout")
-        if timeout is not None:
-            api_kwargs["timeout"] = timeout
-
-        # Tools — apply Moonshot/Kimi schema sanitization regardless of path
-        if tools:
-            if is_moonshot_model(model):
-                tools = sanitize_moonshot_tools(tools)
-            api_kwargs["tools"] = tools
-
-        # max_tokens resolution — priority: ephemeral > user > profile default
-        max_tokens_fn = params.get("max_tokens_param_fn")
-        ephemeral = params.get("ephemeral_max_output_tokens")
-        user_max = params.get("max_tokens")
-        anthropic_max = params.get("anthropic_max_output")
-
-        if ephemeral is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(ephemeral))
-        elif user_max is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(user_max))
-        elif profile.default_max_tokens and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
-        elif anthropic_max is not None:
-            api_kwargs["max_tokens"] = anthropic_max
-
-        # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
-        reasoning_config = params.get("reasoning_config")
-        extra_body_from_profile, top_level_from_profile = (
-            profile.build_api_kwargs_extras(
-                reasoning_config=reasoning_config,
-                supports_reasoning=params.get("supports_reasoning", False),
-                qwen_session_metadata=params.get("qwen_session_metadata"),
-                model=model,
-                ollama_num_ctx=params.get("ollama_num_ctx"),
-            )
-        )
-        api_kwargs.update(top_level_from_profile)
-
-        # extra_body assembly
-        extra_body: dict[str, Any] = {}
-
-        # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
-        profile_body = profile.build_extra_body(
-            session_id=params.get("session_id"),
-            provider_preferences=params.get("provider_preferences"),
-        )
-        if profile_body:
-            extra_body.update(profile_body)
-
-        # Profile's reasoning/thinking extra_body entries
-        if extra_body_from_profile:
-            extra_body.update(extra_body_from_profile)
-
-        # Merge any pre-built extra_body additions from the caller
-        additions = params.get("extra_body_additions")
-        if additions:
-            extra_body.update(additions)
-
-        # Request overrides (user config)
-        overrides = params.get("request_overrides")
-        if overrides:
-            for k, v in overrides.items():
-                if k == "extra_body" and isinstance(v, dict):
-                    extra_body.update(v)
-                else:
-                    api_kwargs[k] = v
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        return api_kwargs
-
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize OpenAI ChatCompletion to NormalizedResponse.

@@ -319,7 +313,7 @@ class ChatCompletionsTransport(ProviderTransport):
                # Gemini 3 thinking models attach extra_content with
                # thought_signature — without replay on the next turn the API
                # rejects the request with 400.
-                tc_provider_data: dict[str, Any] = {}
+                tc_provider_data: Dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
                    extra = (tc.model_extra or {}).get("extra_content")
@@ -330,14 +324,12 @@ class ChatCompletionsTransport(ProviderTransport):
                        except Exception:
                            pass
                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(
-                    ToolCall(
-                        id=tc.id,
-                        name=tc.function.name,
-                        arguments=tc.function.arguments,
-                        provider_data=tc_provider_data or None,
-                    )
-                )
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -355,7 +347,7 @@ class ChatCompletionsTransport(ProviderTransport):
        reasoning = getattr(msg, "reasoning", None)
        reasoning_content = getattr(msg, "reasoning_content", None)

-        provider_data: dict[str, Any] = {}
+        provider_data: Dict[str, Any] = {}
        if reasoning_content:
            provider_data["reasoning_content"] = reasoning_content
        rd = getattr(msg, "reasoning_details", None)
@@ -381,7 +373,7 @@ class ChatCompletionsTransport(ProviderTransport):
            return False
        return True

-    def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
        usage = getattr(response, "usage", None)
        if usage is None:
@@ -12,7 +12,7 @@ from __future__ import annotations

 import json
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Dict, List, Optional


@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
    * Others: ``None``
    """

-    id: str | None
+    id: Optional[str]
    name: str
    arguments: str  # JSON string
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
        return "function"

    @property
-    def function(self) -> ToolCall:
+    def function(self) -> "ToolCall":
        """Return self so tc.function.name / tc.function.arguments work."""
        return self

    @property
-    def call_id(self) -> str | None:
+    def call_id(self) -> Optional[str]:
        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
        return (self.provider_data or {}).get("call_id")

    @property
-    def response_item_id(self) -> str | None:
+    def response_item_id(self) -> Optional[str]:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

@@ -101,18 +101,18 @@ class NormalizedResponse:
    * Others: ``None``
    """

-    content: str | None
-    tool_calls: list[ToolCall] | None
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: str | None = None
-    usage: Usage | None = None
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The shim _nr_to_assistant_message() mapped these from provider_data.
    # These properties let NormalizedResponse pass through directly.
    @property
-    def reasoning_content(self) -> str | None:
+    def reasoning_content(self) -> Optional[str]:
        pd = self.provider_data or {}
        return pd.get("reasoning_content")

@@ -136,9 +136,8 @@ class NormalizedResponse:
 # Factory helpers
 # ---------------------------------------------------------------------------

-
 def build_tool_call(
-    id: str | None,
+    id: Optional[str],
    name: str,
    arguments: Any,
    **provider_fields: Any,
@@ -152,7 +151,7 @@ def build_tool_call(
    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)


-def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
    """Translate a provider-specific stop reason to the normalised set.

    Falls back to ``"stop"`` for unknown or ``None`` reasons.
@@ -15,7 +15,6 @@ Usage:

 import logging
 import os
-import re
 import shutil
 import sys
 import json
@@ -759,17 +758,9 @@ def _run_cleanup():
        pass
    try:
        if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
-            # Forward the agent's own transcript so memory providers'
-            # ``on_session_end`` hooks see the real conversation instead of
-            # an empty list (#15165). ``_session_messages`` is set on
-            # ``AIAgent.__init__`` and refreshed every turn via
-            # ``_persist_session``. Fall back to no-arg on test stubs /
-            # partially-initialised agents where the attribute is missing.
-            _session_msgs = getattr(_active_agent_ref, '_session_messages', None)
-            if isinstance(_session_msgs, list):
-                _active_agent_ref.shutdown_memory_provider(_session_msgs)
-            else:
-                _active_agent_ref.shutdown_memory_provider()
+            _active_agent_ref.shutdown_memory_provider(
+                getattr(_active_agent_ref, 'conversation_history', None) or []
+            )
    except Exception:
        pass

@@ -1556,60 +1547,6 @@ def _should_auto_attach_clipboard_image_on_paste(pasted_text: str) -> bool:
    return not pasted_text.strip()


-def _strip_leaked_bracketed_paste_wrappers(text: str) -> str:
-    """Strip leaked bracketed-paste wrapper markers from user-visible text.
-
-    Defensive normalization for cases where terminal/prompt_toolkit parsing
-    fails and bracketed-paste markers end up in the buffer as literal text.
-
-    We strip canonical wrappers unconditionally and also handle degraded
-    visible forms like ``[200~`` / ``[201~`` and ``00~`` / ``01~`` when they
-    look like wrapper boundaries, not arbitrary user content.
-    """
-    if not text:
-        return text
-
-    text = (
-        text.replace("\x1b[200~", "")
-        .replace("\x1b[201~", "")
-        .replace("^[[200~", "")
-        .replace("^[[201~", "")
-    )
-    text = re.sub(r"(^|[\s\n>:\]\)])\[200~", r"\1", text)
-    text = re.sub(r"\[201~(?=$|[\s\n<\[\(\):;.,!?])", "", text)
-    text = re.sub(r"(^|[\s\n>:\]\)])00~", r"\1", text)
-    text = re.sub(r"01~(?=$|[\s\n<\[\(\):;.,!?])", "", text)
-    return text
-
-
-# Cursor Position Report (CPR / DSR) response, format ``ESC[<row>;<col>R``.
-# prompt_toolkit's _on_resize() + renderer send ``ESC[6n`` queries to the
-# terminal; under resize storms or tab switches the terminal's reply can
-# race past the input parser and end up in the input buffer as literal
-# text (see issue #14692). Also matches the visible-form ``^[[<row>;<col>R``
-# that appears when the ESC byte was stripped by a prior filter.
-_DSR_CPR_ESC_RE = re.compile(r"\x1b\[\d+;\d+R")
-_DSR_CPR_VISIBLE_RE = re.compile(r"\^\[\[\d+;\d+R")
-
-
-def _strip_leaked_terminal_responses(text: str) -> str:
-    """Strip leaked terminal control-response sequences from user input.
-
-    Covers Cursor Position Report (CPR / DSR) responses — ``ESC[<row>;<col>R``
-    and the visible ``^[[<row>;<col>R`` form. These are replies the terminal
-    sends back to queries prompt_toolkit makes during ``_on_resize`` /
-    ``_request_absolute_cursor_position``. When the input parser drops one
-    (resize storms, multiplexer focus changes, slow PTYs) the response
-    lands in the input buffer as literal text and corrupts what the user
-    typed.
-    """
-    if not text:
-        return text
-    text = _DSR_CPR_ESC_RE.sub("", text)
-    text = _DSR_CPR_VISIBLE_RE.sub("", text)
-    return text
-
-
 def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
    """Collect local image attachments for single-query CLI flows."""
    message = query or ""
@@ -2218,42 +2155,6 @@ class HermesCLI:
            self._last_invalidate = now
            self._app.invalidate()

-    def _force_full_redraw(self) -> None:
-        """Force a clean full-screen repaint of the prompt_toolkit UI.
-
-        Used to recover from terminal buffer drift caused by external
-        redraws we can't detect — e.g. macOS cmux / tmux tab switches,
-        ``clear`` issued from a subshell, or SSH window restores. These
-        wipe or repaint the terminal without firing SIGWINCH, so
-        prompt_toolkit's tracked ``_cursor_pos`` no longer matches reality
-        and the next incremental redraw stacks on top of stale content
-        (ghost status bars, duplicated prompts).
-
-        Bound to Ctrl+L and exposed as the ``/redraw`` slash command,
-        matching the standard terminal-UX convention (bash, zsh, fish,
-        vim, htop).
-        """
-        app = getattr(self, "_app", None)
-        if not app:
-            return
-        try:
-            renderer = app.renderer
-            out = renderer.output
-            out.reset_attributes()
-            out.erase_screen()
-            out.cursor_goto(0, 0)
-            out.flush()
-            # Drop prompt_toolkit's cached screen + cursor state so the
-            # next _redraw() starts from a known (0, 0) origin and
-            # re-renders every cell rather than diffing against stale.
-            renderer.reset(leave_alternate_screen=False)
-        except Exception:
-            pass
-        try:
-            app.invalidate()
-        except Exception:
-            pass
-
    def _status_bar_context_style(self, percent_used: Optional[int]) -> str:
        if percent_used is None:
            return "class:status-bar-dim"
@@ -6000,7 +5901,6 @@ class HermesCLI:
            platform_status = {
                Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
                Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
-                Platform.SLACK: ("Slack", "SLACK_BOT_TOKEN"),
                Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
            }
            
@@ -6071,12 +5971,6 @@ class HermesCLI:
            self.show_toolsets()
        elif canonical == "config":
            self.show_config()
-        elif canonical == "redraw":
-            # Manual recovery for terminal buffer drift from multiplexer
-            # tab switches, subshell ``clear``, SSH window restores, etc.
-            # See issue #8688 (cmux). Ctrl+L is bound to the same helper.
-            self._force_full_redraw()
-            _cprint(f"  {_DIM}✓ UI redrawn{_RST}")
        elif canonical == "clear":
            self.new_session(silent=True)
            # Clear terminal screen.  Inside the TUI, Rich's console.clear()
@@ -8442,62 +8336,13 @@ class HermesCLI:
        ):
            return None
        
-        # Route image attachments based on the active model's vision capability.
-        # "native" → pass pixels as OpenAI-style content parts (adapters
-        #            translate for Anthropic/Gemini/Bedrock).
-        # "text"   → pre-analyze each image with vision_analyze and prepend the
-        #            description as text — works with non-vision models.
-        # See agent/image_routing.py for the decision table.
+        # Pre-process images through the vision tool (Gemini Flash) so the
+        # main model receives text descriptions instead of raw base64 image
+        # content — works with any model, not just vision-capable ones.
        if images:
-            try:
-                from agent.image_routing import (
-                    build_native_content_parts,
-                    decide_image_input_mode,
-                )
-                from hermes_cli.config import load_config
-
-                _img_mode = decide_image_input_mode(
-                    (self.provider or "").strip(),
-                    (self.model or "").strip(),
-                    load_config(),
-                )
-            except Exception as _img_exc:
-                logging.debug("image_routing decision failed, defaulting to text: %s", _img_exc)
-                _img_mode = "text"
-
-            if _img_mode == "native":
-                try:
-                    _text_for_parts = message if isinstance(message, str) else ""
-                    _img_str_paths = [str(p) for p in images]
-                    _parts, _skipped = build_native_content_parts(
-                        _text_for_parts,
-                        _img_str_paths,
-                    )
-                    if _skipped:
-                        _cprint(
-                            f"  {_DIM}⚠ skipped {len(_skipped)} unreadable image path(s){_RST}"
-                        )
-                    if any(p.get("type") == "image_url" for p in _parts):
-                        _img_names = ", ".join(Path(p).name for p in _img_str_paths)
-                        _cprint(
-                            f"  {_DIM}📎 attaching {len(images)} image(s) natively "
-                            f"(model supports vision): {_img_names}{_RST}"
-                        )
-                        message = _parts
-                    else:
-                        # All images unreadable — fall back to text enrichment.
-                        message = self._preprocess_images_with_vision(
-                            message if isinstance(message, str) else "", images
-                        )
-                except Exception as _img_exc:
-                    logging.warning("native image attach failed, falling back to text: %s", _img_exc)
-                    message = self._preprocess_images_with_vision(
-                        message if isinstance(message, str) else "", images
-                    )
-            else:
-                message = self._preprocess_images_with_vision(
-                    message if isinstance(message, str) else "", images
-                )
+            message = self._preprocess_images_with_vision(
+                message if isinstance(message, str) else "", images
+            )

        # Expand @ context references (e.g. @file:main.py, @diff, @folder:src/)
        if isinstance(message, str) and "@" in message:
@@ -8800,20 +8645,12 @@ class HermesCLI:
            if response and result and not result.get("failed") and not result.get("partial"):
                try:
                    from agent.title_generator import maybe_auto_title
-                    # Route title-generation failures through the agent's
-                    # user-visible warning channel so a depleted auxiliary
-                    # provider doesn't silently leave sessions untitled
-                    # (issue #15775).
-                    _title_failure_cb = getattr(
-                        self.agent, "_emit_auxiliary_failure", None
-                    ) if self.agent else None
                    maybe_auto_title(
                        self._session_db,
                        self.session_id,
                        message,
                        response,
                        self.conversation_history,
-                        failure_callback=_title_failure_cb,
                    )
                except Exception:
                    pass
@@ -9691,17 +9528,6 @@ class HermesCLI:
            """Down arrow: browse history when on last line, else move cursor down."""
            event.app.current_buffer.auto_down(count=event.arg)

-        @kb.add('c-l')
-        def handle_ctrl_l(event):
-            """Ctrl+L: force a clean full-screen repaint.
-
-            Recovers the UI after external terminal buffer drift — tmux /
-            cmux tab switches, ``clear`` from a subshell, SSH window
-            restores, etc. — that prompt_toolkit can't detect on its own.
-            Matches the universal bash/zsh/fish/vim/htop convention.
-            """
-            self._force_full_redraw()
-
        @kb.add('c-c')
        def handle_ctrl_c(event):
            """Handle Ctrl+C - cancel interactive prompts, interrupt agent, or exit.
@@ -9929,18 +9755,10 @@ class HermesCLI:
            placeholder while preserving any existing user text in the
            buffer.
            """
-            # Diagnostic canary: measure how long the paste handler blocks
-            # the prompt_toolkit event loop. If this exceeds ~500ms we log
-            # it so recurring "CLI freezes on paste" reports (issue #16263,
-            # macOS Tahoe 26 + iTerm2/Ghostty) arrive with data attached.
-            _paste_handler_start = time.perf_counter()
-            _paste_raw_size = len(event.data or "")
            pasted_text = event.data or ""
            # Normalise line endings — Windows \r\n and old Mac \r both become \n
            # so the 5-line collapse threshold and display are consistent.
            pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
-            pasted_text = _strip_leaked_bracketed_paste_wrappers(pasted_text)
-            pasted_text = _strip_leaked_terminal_responses(pasted_text)
            if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
@@ -9963,17 +9781,6 @@ class HermesCLI:
                    buf.insert_text(prefix + placeholder)
                else:
                    buf.insert_text(pasted_text)
-            _paste_handler_elapsed_ms = (time.perf_counter() - _paste_handler_start) * 1000.0
-            if _paste_handler_elapsed_ms > 500.0:
-                logger.warning(
-                    "Slow bracketed-paste handler: %.1fms to process %d bytes "
-                    "(%d lines) on %s. If the input becomes unresponsive after "
-                    "this, attach this log line to the bug report.",
-                    _paste_handler_elapsed_ms,
-                    _paste_raw_size,
-                    pasted_text.count('\n') + 1 if pasted_text else 0,
-                    sys.platform,
-                )

        @kb.add('c-v')
        def handle_ctrl_v(event):
@@ -10093,16 +9900,7 @@ class HermesCLI:
               still batch newlines.  Alt+Enter only adds 1 newline per
               event so it never triggers this.
            """
-            text = _strip_leaked_bracketed_paste_wrappers(buf.text)
-            text = _strip_leaked_terminal_responses(text)
-            if text != buf.text:
-                cursor = min(buf.cursor_position, len(text))
-                _paste_just_collapsed[0] = True
-                buf.text = text
-                buf.cursor_position = cursor
-                _prev_text_len[0] = len(text)
-                _prev_newline_count[0] = text.count('\n')
-                return
+            text = buf.text
            chars_added = len(text) - _prev_text_len[0]
            _prev_text_len[0] = len(text)
            if _paste_just_collapsed[0] or self._skip_paste_collapse:
@@ -10759,30 +10557,36 @@ class HermesCLI:
        # only cursor_up()s by the stored layout height, missing the extra
        # rows created by reflow — leaving ghost duplicates visible.
        #
-        # It's not just column-shrink: widening, row-shrinking, and
-        # multiplexer-driven SIGWINCH-less redraws (cmux / tmux tab switch)
-        # all produce the same class of drift, where the renderer's tracked
-        # _cursor_pos.y no longer matches terminal reality. The only reliable
-        # recovery is a full screen-clear (\x1b[2J\x1b[H) before the next
-        # redraw, so we force one on every resize rather than trying to
-        # compute the exact drift.
+        # Fix: before the standard erase, inflate _cursor_pos.y so the
+        # cursor moves up far enough to cover the reflowed ghost content.
        _original_on_resize = app._on_resize

        def _resize_clear_ghosts():
+            from prompt_toolkit.data_structures import Point as _Pt
            renderer = app.renderer
            try:
-                out = renderer.output
-                # Reset attributes, erase the entire screen, and home the
-                # cursor. This overwrites any reflowed status-bar rows or
-                # stale content the terminal kept from the prior layout.
-                out.reset_attributes()
-                out.erase_screen()
-                out.cursor_goto(0, 0)
-                out.flush()
-                # Tell the renderer its tracked position is fresh so its
-                # own erase() inside _on_resize doesn't cursor_up() past
-                # the top of the screen.
-                renderer.reset(leave_alternate_screen=False)
+                old_size = renderer._last_size
+                new_size = renderer.output.get_size()
+                if (
+                    old_size
+                    and new_size.columns < old_size.columns
+                    and new_size.columns > 0
+                ):
+                    reflow_factor = (
+                        (old_size.columns + new_size.columns - 1)
+                        // new_size.columns
+                    )
+                    last_h = (
+                        renderer._last_screen.height
+                        if renderer._last_screen
+                        else 0
+                    )
+                    extra = last_h * (reflow_factor - 1)
+                    if extra > 0:
+                        renderer._cursor_pos = _Pt(
+                            x=renderer._cursor_pos.x,
+                            y=renderer._cursor_pos.y + extra,
+                        )
            except Exception:
                pass  # never break resize handling
            _original_on_resize()
@@ -10790,6 +10594,7 @@ class HermesCLI:
        app._on_resize = _resize_clear_ghosts

        def spinner_loop():
+            last_idle_refresh = 0.0
            while not self._should_exit:
                if not self._app:
                    time.sleep(0.1)
@@ -10798,11 +10603,10 @@ class HermesCLI:
                    self._invalidate(min_interval=0.1)
                    time.sleep(0.1)
                else:
-                    # Do not repaint the idle prompt every second. In non-full-screen
-                    # prompt_toolkit mode, background redraws can fight tmux/Ghostty/cmux
-                    # viewport restoration after focus changes and visually move the
-                    # command input area. Keep idle stable; input/agent events still
-                    # invalidate explicitly when the UI actually changes.
+                    now = time.monotonic()
+                    if now - last_idle_refresh >= 1.0:
+                        last_idle_refresh = now
+                        self._invalidate(min_interval=1.0)
                    time.sleep(0.2)

        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
@@ -10844,10 +10648,6 @@ class HermesCLI:
                    submit_images = []
                    if isinstance(user_input, tuple):
                        user_input, submit_images = user_input
-
-                    if isinstance(user_input, str):
-                        user_input = _strip_leaked_bracketed_paste_wrappers(user_input)
-                        user_input = _strip_leaked_terminal_responses(user_input)
                    
                    # Check for commands — but detect dragged/pasted file paths first.
                    # See _detect_file_drop() for details.
@@ -311,12 +311,6 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None

    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
-            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' "
-                "is not installed. Install the 'cron' extra (pip install "
-                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
-                schedule.get("expr"),
-            )
            return None
        cron = croniter(schedule["expr"], now)
        next_run = cron.get_next(datetime)
@@ -704,32 +698,10 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
                # Compute next run
                job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run, decide whether this is terminal completion
-                # (one-shot) or a transient failure (recurring schedule couldn't
-                # compute — e.g. 'croniter' missing from the runtime env).
-                # Recurring jobs must NEVER be silently disabled: that turns a
-                # missing runtime dep into "job completed" and the user's
-                # schedule quietly goes off. See issue #16265.
+                # If no next run (one-shot completed), disable
                if job["next_run_at"] is None:
-                    kind = job.get("schedule", {}).get("kind")
-                    if kind in ("cron", "interval"):
-                        job["state"] = "error"
-                        if not job.get("last_error"):
-                            job["last_error"] = (
-                                "Failed to compute next run for recurring "
-                                "schedule (is the 'croniter' package "
-                                "installed in the gateway's Python env?)"
-                            )
-                        logger.error(
-                            "Job '%s' (%s) could not compute next_run_at; "
-                            "leaving enabled and marking state=error so the "
-                            "job is not silently disabled.",
-                            job.get("name", job["id"]),
-                            kind,
-                        )
-                    else:
-                        job["enabled"] = False
-                        job["state"] = "completed"
+                    job["enabled"] = False
+                    job["state"] = "completed"
                elif job.get("state") != "paused":
                    job["state"] = "scheduled"

@@ -822,8 +822,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

-    agent = None
-
    # Mark this as a cron session so the approval system can apply cron_mode.
    # This env var is process-wide and persists for the lifetime of the
    # scheduler process — every job this process runs is a cron job.
@@ -1172,24 +1170,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                _session_db.close()
            except (Exception, KeyboardInterrupt) as e:
                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
-        # Release subprocesses, terminal sandboxes, browser daemons, and the
-        # main OpenAI/httpx client held by this ephemeral cron agent. Without
-        # this, a gateway that ticks cron every N minutes leaks fds per job
-        # until it hits EMFILE (#10200 / "too many open files").
-        try:
-            if agent is not None:
-                agent.close()
-        except (Exception, KeyboardInterrupt) as e:
-            logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
-        # Each cron run spins up a short-lived worker thread whose event loop
-        # dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
-        # httpx clients cached under that loop are now unusable — reap them
-        # so their transports don't accumulate in the process-global cache.
-        try:
-            from agent.auxiliary_client import cleanup_stale_async_clients
-            cleanup_stale_async_clients()
-        except Exception as e:
-            logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)


 def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -36,7 +36,6 @@

      imports = [
        ./nix/packages.nix
-        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -566,8 +566,6 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
-                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
-                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -612,21 +610,16 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                enabled_was_explicit = "enabled" in platform_cfg
-                if not bridged and not enabled_was_explicit:
+                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
-                if enabled_was_explicit:
-                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
-                if plat == Platform.SLACK and enabled_was_explicit:
-                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -948,14 +941,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
            config.platforms[Platform.SLACK].enabled = True
-        else:
-            slack_config = config.platforms[Platform.SLACK]
-            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
-            if not slack_config.enabled and not enabled_was_explicit:
-                # Top-level Slack settings such as channel prompts should not
-                # turn an env-token setup into a disabled platform. Only an
-                # explicit slack.enabled/platforms.slack.enabled false should.
-                slack_config.enabled = True
        # If yaml config exists, respect its enabled flag (don't override
        # explicit enabled: false). Token is still stored so skills that
        # send Slack messages can use it without activating the gateway adapter.
@@ -1702,41 +1702,13 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
-
-        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
-        network round-trip can't stall the refresh cadence.  Telegram- and
-        Discord-side typing expire after ~5s; if any individual send_typing
-        takes longer than the refresh interval, the bubble would die and
-        stay dead until that call returns.  Abandoning the slow call lets
-        the next tick fire a fresh send_typing on schedule — as long as
-        one of them succeeds within the 5s platform-side window, the bubble
-        stays visible across provider stalls / upstream API timeouts.
        """
-        # Bound each send_typing round-trip so the refresh cadence isn't
-        # gated on network health.  Must stay below ``interval`` so a slow
-        # call gets abandoned before the next scheduled tick.
-        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    try:
-                        await asyncio.wait_for(
-                            self.send_typing(chat_id, metadata=metadata),
-                            timeout=_send_typing_timeout,
-                        )
-                    except asyncio.TimeoutError:
-                        # Slow network — abandon this tick, keep the loop
-                        # on schedule so the next send_typing fires fresh.
-                        pass
-                    except asyncio.CancelledError:
-                        raise
-                    except Exception as typing_err:
-                        logger.debug(
-                            "[%s] send_typing error (non-fatal): %s",
-                            self.name, typing_err,
-                        )
+                    await self.send_typing(chat_id, metadata=metadata)
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -3294,7 +3294,6 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)

        # Build source
-        guild = getattr(message, "guild", None)
        source = self.build_source(
            chat_id=str(effective_channel.id),
            chat_name=chat_name,
@@ -3304,7 +3303,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
-            guild_id=str(guild.id) if guild else None,
+            guild_id=str(message.guild.id) if message.guild else None,
            parent_chat_id=parent_channel_id,
            message_id=str(message.id),
        )
@@ -28,7 +28,6 @@ from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
-from email.utils import formatdate
 from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

-        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -588,7 +586,6 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

-        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -1178,83 +1178,13 @@ class MatrixAdapter(BasePlatformAdapter):
    # Event callbacks
    # ------------------------------------------------------------------

-    def _is_self_sender(self, sender: str) -> bool:
-        """Return True if the sender refers to the bot's own account.
-
-        Matrix user IDs are byte-compared after trimming whitespace and
-        lowercasing — some homeservers normalize the localpart case
-        differently at different API surfaces, and the reply-loop tail
-        of the "hall of mirrors" bug (#15763) has been observed with the
-        bot's own account bypassing a case-sensitive equality check.
-
-        When ``self._user_id`` is empty (whoami hasn't resolved yet, or
-        login failed), we cannot prove a sender is NOT us, so we return
-        True defensively — an unidentified bot dropping its own events
-        is always preferable to falling into an echo loop.
-        """
-        own = (self._user_id or "").strip().lower()
-        if not own:
-            return True
-        return sender.strip().lower() == own
-
-    @staticmethod
-    def _is_system_or_bridge_sender(sender: str) -> bool:
-        """Return True if the sender looks like a system / bridge / appservice
-        identity rather than a real user.
-
-        Appservice namespaces on Matrix conventionally prefix bot / puppet
-        user IDs with an underscore (e.g. ``@_telegram_12345:server``,
-        ``@_discord_999:server``, ``@_slack_...:server``).  Server-notices
-        bots and bridge-controller bots on many homeservers use the same
-        pattern.
-
-        We treat these as system identities for pairing purposes: they
-        should never be offered a pairing code, because an operator
-        approving the code would hand the bridge itself permanent
-        authorization — and every outbound message relayed by the bridge
-        would then loop back into the agent as an "authorized user
-        message", which is the root of issue #15763.
-
-        Matches:
-            ``@_something:server``   — appservice namespace convention
-            ``@:server``             — malformed / empty localpart
-            ``:server``              — malformed, no leading ``@``
-        """
-        s = (sender or "").strip()
-        if not s:
-            return True
-        # Localpart is everything between leading '@' and ':'
-        if s.startswith("@"):
-            s = s[1:]
-        if ":" in s:
-            localpart, _, _ = s.partition(":")
-        else:
-            localpart = s
-        if not localpart:
-            return True
-        return localpart.startswith("_")
-
    async def _on_room_message(self, event: Any) -> None:
        """Handle incoming room message events (text, media)."""
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

-        # Ignore own messages (case-insensitive; also drops when our own
-        # user_id hasn't been resolved yet — see _is_self_sender docstring
-        # and issue #15763).
-        if self._is_self_sender(sender):
-            return
-
-        # Ignore appservice / bridge / system identities so they never
-        # trigger the pairing flow.  Once a bridge user is paired, every
-        # outbound message it relays would loop back as an authorized
-        # user message (the "hall of mirrors" in #15763).
-        if self._is_system_or_bridge_sender(sender):
-            logger.debug(
-                "Matrix: ignoring system/bridge sender %s in %s",
-                sender,
-                room_id,
-            )
+        # Ignore own messages.
+        if sender == self._user_id:
            return

        # Deduplicate by event ID.
@@ -1724,7 +1654,7 @@ class MatrixAdapter(BasePlatformAdapter):
    async def _on_reaction(self, event: Any) -> None:
        """Handle incoming reaction events."""
        sender = str(getattr(event, "sender", ""))
-        if self._is_self_sender(sender):
+        if sender == self._user_id:
            return
        event_id = str(getattr(event, "event_id", ""))
        if self._is_duplicate_event(event_id):
@@ -2353,26 +2353,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    user = getattr(entity, "user", None)
                    if user and getattr(user, "id", None) == bot_id:
                        return True
-                elif entity_type == "bot_command" and expected:
-                    # Telegram's official group-disambiguation form for slash
-                    # commands (``/cmd@botname``) is emitted as a single
-                    # ``bot_command`` entity covering the whole span — there
-                    # is no accompanying ``mention`` entity. Treat it as a
-                    # direct address to this bot when the ``@botname`` suffix
-                    # matches. This is the form Telegram's own command menu
-                    # autocomplete produces in groups, so dropping it at the
-                    # mention gate would break /new, /reset, /help, ... for
-                    # every group that has ``require_mention`` enabled (#15415).
-                    offset = int(getattr(entity, "offset", -1))
-                    length = int(getattr(entity, "length", 0))
-                    if offset < 0 or length <= 0:
-                        continue
-                    command_text = source_text[offset:offset + length]
-                    at_index = command_text.find("@")
-                    if at_index < 0:
-                        continue
-                    if command_text[at_index:].strip().lower() == expected:
-                        return True
        return False

    def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -1943,21 +1943,7 @@ class GatewayRunner:
            return
        try:
            if hasattr(agent, "shutdown_memory_provider"):
-                # Pass the agent's own conversation transcript so memory
-                # providers' ``on_session_end`` hooks see the real messages
-                # instead of the empty default (#15165). ``_session_messages``
-                # is set on ``AIAgent`` (run_agent.py:1518) and refreshed at
-                # the end of every ``run_conversation`` turn via
-                # ``_persist_session``; on an agent built through
-                # ``object.__new__`` (test stubs) the attribute may be
-                # absent, so ``getattr`` with a ``None`` default keeps the
-                # call signature-compatible with the pre-fix behaviour
-                # (``shutdown_memory_provider(messages=None)``).
-                session_messages = getattr(agent, "_session_messages", None)
-                if isinstance(session_messages, list):
-                    agent.shutdown_memory_provider(session_messages)
-                else:
-                    agent.shutdown_memory_provider()
+                agent.shutdown_memory_provider()
        except Exception:
            pass
        # Close tool resources (terminal sandboxes, browser daemons,
@@ -1968,15 +1954,6 @@ class GatewayRunner:
                agent.close()
        except Exception:
            pass
-        # Auxiliary async clients (session_search/web/vision/etc.) live in a
-        # process-global cache and are created inside worker threads. Clean up
-        # any entries whose event loop is now dead so their httpx transports do
-        # not accumulate across gateway turns.
-        try:
-            from agent.auxiliary_client import cleanup_stale_async_clients
-            cleanup_stale_async_clients()
-        except Exception:
-            pass

    _STUCK_LOOP_THRESHOLD = 3  # restarts while active before auto-suspend
    _STUCK_LOOP_FILE = ".restart_failure_counts"
@@ -2940,19 +2917,6 @@ class GatewayRunner:
            # disconnect (defense in depth; safe to call repeatedly).
            _kill_tool_subprocesses("final-cleanup")

-            # Reap the process-global auxiliary-client cache once at the very
-            # end of teardown.  Per-turn cleanup runs in _cleanup_agent_resources
-            # for each active agent, but clients bound to worker-thread loops
-            # that died with their ThreadPoolExecutor (notably cron ticks) only
-            # get swept here.  Without this, long-running gateways accumulate
-            # async httpx transports until they hit EMFILE on macOS's default
-            # RLIMIT_NOFILE=256.  See #14210.
-            try:
-                from agent.auxiliary_client import shutdown_cached_clients
-                shutdown_cached_clients()
-            except Exception as _e:
-                logger.debug("shutdown_cached_clients error: %s", _e)
-
            # Close SQLite session DBs so the WAL write lock is released.
            # Without this, --replace and similar restart flows leave the
            # old gateway's connection holding the WAL lock until Python
@@ -4235,18 +4199,9 @@ class GatewayRunner:
        Keep the normal inbound path and the queued follow-up path on the same
        preprocessing pipeline so sender attribution, image enrichment, STT,
        document notes, reply context, and @ references all behave the same.
-
-        Side effect: writes ``self._pending_native_image_paths`` to a list of
-        local image paths when the active model supports native vision AND
-        the user has images attached. The caller consumes and clears this
-        attribute at the ``run_conversation`` site to build a multimodal user
-        turn. When the list is empty, the ``_enrich_message_with_vision``
-        text path has already run and images are represented in-text.
        """
        history = history or []
        message_text = event.text or ""
-        # Reset per-call buffer; set only when native routing is chosen.
-        self._pending_native_image_paths = []

        _is_shared_multi_user = is_shared_multi_user_session(
            source,
@@ -4267,25 +4222,10 @@ class GatewayRunner:
                    audio_paths.append(path)

            if image_paths:
-                # Decide routing: native (attach pixels) vs text (vision_analyze
-                # pre-run + prepend description).  See agent/image_routing.py.
-                _img_mode = self._decide_image_input_mode()
-                if _img_mode == "native":
-                    # Defer attachment to the run_conversation call site.
-                    self._pending_native_image_paths = list(image_paths)
-                    logger.info(
-                        "Image routing: native (model supports vision). %d image(s) will be attached inline.",
-                        len(image_paths),
-                    )
-                else:
-                    logger.info(
-                        "Image routing: text (mode=%s). Pre-analyzing %d image(s) via vision_analyze.",
-                        _img_mode, len(image_paths),
-                    )
-                    message_text = await self._enrich_message_with_vision(
-                        message_text,
-                        image_paths,
-                    )
+                message_text = await self._enrich_message_with_vision(
+                    message_text,
+                    image_paths,
+                )

            if audio_paths:
                message_text = await self._enrich_message_with_transcription(
@@ -4800,58 +4740,6 @@ class GatewayRunner:
                                            "compression",
                                            f"{_new_tokens:,}",
                                        )
-
-                                    # If summary generation failed, the
-                                    # compressor inserted a static fallback
-                                    # placeholder and the dropped turns are
-                                    # gone for good.  Surface a visible
-                                    # warning to the gateway user — agent.log
-                                    # alone is invisible on TG/Discord/etc.
-                                    _comp = getattr(_hyg_agent, "context_compressor", None)
-                                    if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
-                                        _dropped = getattr(_comp, "_last_summary_dropped_count", 0)
-                                        _err = getattr(_comp, "_last_summary_error", None) or "unknown error"
-                                        _warn_msg = (
-                                            "⚠️ Context compression summary failed "
-                                            f"({_err}). {_dropped} historical message(s) "
-                                            "were removed and replaced with a placeholder. "
-                                            "Earlier context is no longer recoverable. "
-                                            "Consider /reset for a clean session, or check "
-                                            "your auxiliary.compression model configuration."
-                                        )
-                                        try:
-                                            _adapter = self.adapters.get(source.platform)
-                                            if _adapter and source.chat_id:
-                                                await _adapter.send(source.chat_id, _warn_msg, metadata=_hyg_meta)
-                                        except Exception as _werr:
-                                            logger.warning(
-                                                "Failed to deliver compression-failure warning to user: %s",
-                                                _werr,
-                                            )
-                                    # Separately: if the user's CONFIGURED aux
-                                    # model failed and we recovered by falling
-                                    # back to the main model, tell them — a
-                                    # misconfigured auxiliary.compression.model
-                                    # is something only they can fix, and
-                                    # silent recovery would hide it.
-                                    elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None):
-                                        _aux_model = getattr(_comp, "_last_aux_model_failure_model", "")
-                                        _aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error"
-                                        _aux_msg = (
-                                            f"ℹ️ Configured compression model `{_aux_model}` "
-                                            f"failed ({_aux_err}). Recovered using your main "
-                                            "model — context is intact — but you may want to "
-                                            "check `auxiliary.compression.model` in config.yaml."
-                                        )
-                                        try:
-                                            _adapter = self.adapters.get(source.platform)
-                                            if _adapter and source.chat_id:
-                                                await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta)
-                                        except Exception as _werr:
-                                            logger.warning(
-                                                "Failed to deliver aux-model-fallback notice to user: %s",
-                                                _werr,
-                                            )
                                finally:
                                    self._cleanup_agent_resources(_hyg_agent)

@@ -7395,17 +7283,6 @@ class GatewayRunner:
                    approx_tokens,
                    new_tokens,
                )
-                # Detect summary-generation failure so we can surface a
-                # visible warning to the user even on the manual /compress
-                # path (otherwise the failure is silently logged).
-                _summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
-                _dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
-                _summary_err = getattr(compressor, "_last_summary_error", None)
-                # Separately: did the user's CONFIGURED aux model fail
-                # and we recovered via main?  Surface that as an info
-                # note so they can fix their config.
-                _aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None)
-                _aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None)
            finally:
                self._cleanup_agent_resources(tmp_agent)
            lines = [f"🗜️ {summary['headline']}"]
@@ -7414,20 +7291,6 @@ class GatewayRunner:
            lines.append(summary["token_line"])
            if summary["note"]:
                lines.append(summary["note"])
-            if _summary_failed:
-                lines.append(
-                    f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). "
-                    f"{_dropped_count} historical message(s) were removed and replaced "
-                    "with a placeholder; earlier context is no longer recoverable. "
-                    "Consider checking your auxiliary.compression model configuration."
-                )
-            elif _aux_fail_model:
-                lines.append(
-                    f"ℹ️ Configured compression model `{_aux_fail_model}` failed "
-                    f"({_aux_fail_err or 'unknown error'}). Recovered using your main "
-                    "model — context is intact — but you may want to check "
-                    "`auxiliary.compression.model` in config.yaml."
-                )
            return "\n".join(lines)
        except Exception as e:
            logger.warning("Manual compress failed: %s", e)
@@ -8515,29 +8378,6 @@ class GatewayRunner:
        ctx = copy_context()
        return await loop.run_in_executor(None, ctx.run, func, *args)

-    def _decide_image_input_mode(self) -> str:
-        """Resolve the image-input routing for the currently active model.
-
-        Returns ``"native"`` (attach pixels on the user turn) or ``"text"``
-        (pre-analyze with vision_analyze and prepend the description). See
-        agent/image_routing.py for the full decision table.
-
-        The active provider/model are read from config.yaml so the decision
-        tracks ``/model`` switches automatically on the next message.
-        """
-        try:
-            from agent.image_routing import decide_image_input_mode
-            from agent.auxiliary_client import _read_main_model, _read_main_provider
-            from hermes_cli.config import load_config
-
-            cfg = load_config()
-            provider = _read_main_provider()
-            model = _read_main_model()
-            return decide_image_input_mode(provider, model, cfg)
-        except Exception as exc:
-            logger.debug("image_routing: decision failed, falling back to text — %s", exc)
-            return "text"
-
    async def _enrich_message_with_vision(
        self,
        user_text: str,
@@ -8560,7 +8400,6 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
-        from agent.memory_manager import sanitize_context

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8579,7 +8418,6 @@ class GatewayRunner:
                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
-                    description = sanitize_context(description)
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
@@ -10556,39 +10394,7 @@ class GatewayRunner:
            _approval_session_token = set_current_session_key(_approval_session_key)
            register_gateway_notify(_approval_session_key, _approval_notify_sync)
            try:
-                # If _prepare_inbound_message_text buffered image paths for native
-                # attachment, wrap the user turn as an OpenAI-style multimodal
-                # content list. Consume-and-clear so subsequent turns on the same
-                # runner instance don't re-attach stale images.
-                _native_imgs = list(getattr(self, "_pending_native_image_paths", []) or [])
-                self._pending_native_image_paths = []
-                if _native_imgs:
-                    try:
-                        from agent.image_routing import build_native_content_parts
-                        _parts, _skipped = build_native_content_parts(
-                            message,
-                            _native_imgs,
-                        )
-                        if _skipped:
-                            logger.warning(
-                                "Native image attachment: skipped %d unreadable path(s): %s",
-                                len(_skipped), _skipped,
-                            )
-                        if any(p.get("type") == "image_url" for p in _parts):
-                            _run_message: Any = _parts
-                        else:
-                            # All images failed to read — fall back to plain text.
-                            _run_message = message
-                    except Exception as _img_exc:
-                        logger.warning(
-                            "Native image attachment failed, falling back to text: %s",
-                            _img_exc,
-                        )
-                        _run_message = message
-                else:
-                    _run_message = message
-
-                result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
+                result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            finally:
                unregister_gateway_notify(_approval_session_key)
                reset_current_session_key(_approval_session_token)
@@ -10694,20 +10500,12 @@ class GatewayRunner:
                try:
                    from agent.title_generator import maybe_auto_title
                    all_msgs = result_holder[0].get("messages", []) if result_holder[0] else []
-                    # Route title-generation failures through the agent's
-                    # user-visible warning channel so a depleted auxiliary
-                    # provider doesn't silently leave sessions untitled
-                    # (issue #15775).
-                    _title_failure_cb = getattr(
-                        agent, "_emit_auxiliary_failure", None
-                    )
                    maybe_auto_title(
                        self._session_db,
                        effective_session_id,
                        message,
                        final_response,
                        all_msgs,
-                        failure_callback=_title_failure_cb,
                    )
                except Exception:
                    pass
@@ -11347,16 +11145,13 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
    cron delivery path so live adapters can be used for E2EE rooms.

    Also refreshes the channel directory every 5 minutes and prunes the
-    image/audio/document cache + expired ``hermes debug share`` pastes
-    once per hour.
+    image/audio/document cache once per hour.
    """
    from cron.scheduler import tick as cron_tick
    from gateway.platforms.base import cleanup_image_cache, cleanup_document_cache
-    from hermes_cli.debug import _sweep_expired_pastes

    IMAGE_CACHE_EVERY = 60   # ticks — once per hour at default 60s interval
    CHANNEL_DIR_EVERY = 5    # ticks — every 5 minutes
-    PASTE_SWEEP_EVERY = 60   # ticks — once per hour

    logger.info("Cron ticker started (interval=%ds)", interval)
    tick_count = 0
@@ -11397,17 +11192,6 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
            except Exception as e:
                logger.debug("Document cache cleanup error: %s", e)

-        if tick_count % PASTE_SWEEP_EVERY == 0:
-            try:
-                deleted, remaining = _sweep_expired_pastes()
-                if deleted:
-                    logger.info(
-                        "Paste sweep: deleted %d expired paste(s), %d pending",
-                        deleted, remaining,
-                    )
-            except Exception as e:
-                logger.debug("Paste sweep error: %s", e)
-
        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")

@@ -224,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("ARCEEAI_API_KEY",),
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": ProviderConfig(
-        id="gmi",
-        name="GMI Cloud",
-        auth_type="api_key",
-        inference_base_url="https://api.gmi-serving.com/v1",
-        api_key_env_vars=("GMI_API_KEY",),
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -374,37 +366,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
    ),
 }

-# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
-# providers/ that is not already declared above.  New providers only need a
-# providers/*.py file — no edits to this file required.
-try:
-    from providers import list_providers as _list_providers_for_registry
-    for _pp in _list_providers_for_registry():
-        if _pp.name in PROVIDER_REGISTRY:
-            continue
-        if _pp.auth_type != "api_key" or not _pp.env_vars:
-            continue
-        # Skip providers that need custom token resolution (copilot, kimi, zai)
-        # — those are already fully declared above.
-        if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}:
-            continue
-        _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
-        _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
-        PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
-            id=_pp.name,
-            name=_pp.display_name or _pp.name,
-            auth_type="api_key",
-            inference_base_url=_pp.base_url,
-            api_key_env_vars=_api_key_vars or _pp.env_vars,
-            base_url_env_var=_base_url_var or "",
-        )
-        # Also register aliases so resolve_provider() resolves them
-        for _alias in _pp.aliases:
-            if _alias not in PROVIDER_REGISTRY:
-                PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
-except Exception:
-    pass
-

 # =============================================================================
 # Anthropic Key Helper
@@ -1159,7 +1120,6 @@ def resolve_provider(
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
-        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
@@ -1181,17 +1141,6 @@ def resolve_provider(
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
-    # Extend with aliases declared in providers/*.py that aren't already mapped.
-    # This keeps providers/ as the single source for new aliases while the
-    # hardcoded dict above remains authoritative for existing ones.
-    try:
-        from providers import list_providers as _lp
-        for _pp in _lp():
-            for _alias in _pp.aliases:
-                if _alias not in _PROVIDER_ALIASES:
-                    _PROVIDER_ALIASES[_alias] = _pp.name
-    except Exception:
-        pass
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

    if normalized == "openrouter":
@@ -36,23 +36,12 @@ _EXCLUDED_DIRS = {
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
    "node_modules",     # js deps if website/ somehow leaks in
-    "backups",          # prior auto-backups — don't nest backups exponentially
-    "checkpoints",      # session-local trajectory caches — regenerated per-session,
-                        # session-hash-keyed so they don't port to another machine anyway
 }

 # File-name suffixes to skip
 _EXCLUDED_SUFFIXES = (
    ".pyc",
    ".pyo",
-    # SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
-    # via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
-    # rollback-journal alongside would pair a fresh snapshot with stale sidecar
-    # state and produce a torn restore on the next open. They're transient and
-    # regenerated on first connection anyway.
-    ".db-wal",
-    ".db-shm",
-    ".db-journal",
 )

 # File names to skip (runtime state that's meaningless on another machine)
@@ -465,12 +454,6 @@ def run_import(args) -> None:
 # Critical state files to include in quick snapshots (relative to HERMES_HOME).
 # Everything else is either regeneratable (logs, cache) or managed separately
 # (skills, repo, sessions/).
-#
-# Entries may be individual files OR directories.  Directories are captured
-# recursively; missing entries are silently skipped.  Pairing data lives in
-# platform-specific JSON blobs outside state.db, so it's listed here explicitly
-# — `hermes update` snapshots this set before pulling so approved-user lists
-# are recoverable if anything goes wrong (issue #15733).
 _QUICK_STATE_FILES = (
    "state.db",
    "config.yaml",
@@ -480,10 +463,6 @@ _QUICK_STATE_FILES = (
    "gateway_state.json",
    "channel_directory.json",
    "processes.json",
-    # Pairing stores (generic + per-platform JSONs outside state.db)
-    "pairing",                          # legacy location (gateway/pairing.py)
-    "platforms/pairing",                # new location (gateway/pairing.py)
-    "feishu_comment_pairing.json",      # Feishu comment subscription pairings
 )

 _QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -519,27 +498,7 @@ def create_quick_snapshot(

    for rel in _QUICK_STATE_FILES:
        src = home / rel
-        if not src.exists():
-            continue
-
-        if src.is_dir():
-            # Walk the directory and record each file individually in the
-            # manifest so restore can treat them uniformly.  Empty dirs are
-            # skipped (nothing to snapshot).
-            for sub in src.rglob("*"):
-                if not sub.is_file():
-                    continue
-                sub_rel = sub.relative_to(home).as_posix()
-                dst = snap_dir / sub_rel
-                dst.parent.mkdir(parents=True, exist_ok=True)
-                try:
-                    shutil.copy2(sub, dst)
-                    manifest[sub_rel] = dst.stat().st_size
-                except (OSError, PermissionError) as exc:
-                    logger.warning("Could not snapshot %s: %s", sub_rel, exc)
-            continue
-
-        if not src.is_file():
+        if not src.exists() or not src.is_file():
            continue

        dst = snap_dir / rel
@@ -694,138 +653,3 @@ def run_quick_backup(args) -> None:
        print(f"  Restore with: /snapshot restore {snap_id}")
    else:
        print("No state files found to snapshot.")
-
-
-# ---------------------------------------------------------------------------
-# Pre-update auto-backup
-# ---------------------------------------------------------------------------
-
-_PRE_UPDATE_BACKUPS_DIR = "backups"
-_PRE_UPDATE_PREFIX = "pre-update-"
-_PRE_UPDATE_DEFAULT_KEEP = 5
-
-
-def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
-    home = hermes_home or get_hermes_home()
-    return home / _PRE_UPDATE_BACKUPS_DIR
-
-
-def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
-    """Remove oldest pre-update backups beyond the keep limit.
-
-    Returns the number of files deleted.  Only touches files matching
-    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
-    are never touched.
-    """
-    if keep < 0:
-        keep = 0
-    if not backup_dir.exists():
-        return 0
-
-    backups = sorted(
-        (p for p in backup_dir.iterdir()
-         if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
-        key=lambda p: p.name,
-        reverse=True,
-    )
-
-    deleted = 0
-    for p in backups[keep:]:
-        try:
-            p.unlink()
-            deleted += 1
-        except OSError as exc:
-            logger.warning("Failed to prune backup %s: %s", p.name, exc)
-
-    return deleted
-
-
-def create_pre_update_backup(
-    hermes_home: Optional[Path] = None,
-    keep: int = _PRE_UPDATE_DEFAULT_KEEP,
-) -> Optional[Path]:
-    """Create a full zip backup of HERMES_HOME under ``backups/``.
-
-    Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
-    but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
-    auto-prunes old pre-update backups.
-
-    Returns the path to the created zip, or ``None`` if no files were
-    found or the backup could not be created.  Never raises — the caller
-    (``hermes update``) should continue even if the backup fails.
-    """
-    hermes_root = hermes_home or get_default_hermes_root()
-    if not hermes_root.is_dir():
-        return None
-
-    backup_dir = _pre_update_backup_dir(hermes_root)
-    try:
-        backup_dir.mkdir(parents=True, exist_ok=True)
-    except OSError as exc:
-        logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
-        return None
-
-    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
-    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
-
-    # Collect files (same logic as run_backup, minus the chatty progress prints)
-    files_to_add: list[tuple[Path, Path]] = []
-    try:
-        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
-            dp = Path(dirpath)
-            # Prune excluded directories in-place so os.walk doesn't descend
-            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
-
-            for fname in filenames:
-                fpath = dp / fname
-                try:
-                    rel = fpath.relative_to(hermes_root)
-                except ValueError:
-                    continue
-
-                if _should_exclude(rel):
-                    continue
-
-                # Skip the output zip itself if it already exists
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
-                files_to_add.append((fpath, rel))
-    except OSError as exc:
-        logger.warning("Pre-update backup: walk failed: %s", exc)
-        return None
-
-    if not files_to_add:
-        return None
-
-    try:
-        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
-            for abs_path, rel_path in files_to_add:
-                try:
-                    if abs_path.suffix == ".db":
-                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-                            tmp_db = Path(tmp.name)
-                        try:
-                            if _safe_copy_db(abs_path, tmp_db):
-                                zf.write(tmp_db, arcname=str(rel_path))
-                        finally:
-                            tmp_db.unlink(missing_ok=True)
-                    else:
-                        zf.write(abs_path, arcname=str(rel_path))
-                except (PermissionError, OSError, ValueError) as exc:
-                    logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
-                    continue
-    except OSError as exc:
-        logger.warning("Pre-update backup: zip write failed: %s", exc)
-        # Best-effort cleanup of partial file
-        try:
-            out_path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return None
-
-    _prune_pre_update_backups(backup_dir, keep=keep)
-    return out_path
@@ -62,8 +62,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("reset",)),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
-    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
-               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
    CommandDef("save", "Save the current conversation", "Session",
@@ -389,20 +389,6 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
-        # How user-attached images are presented to the main model on each turn.
-        #   "auto"   — attach natively when the active model reports
-        #              supports_vision=True AND the user hasn't explicitly
-        #              configured auxiliary.vision.provider.  Otherwise fall
-        #              back to text (vision_analyze pre-analysis).
-        #   "native" — always attach natively; non-vision models will either
-        #              error at the provider or get a last-chance text fallback
-        #              (see run_agent._prepare_messages_for_api).
-        #   "text"   — always pre-analyze with vision_analyze and prepend the
-        #              description as text; the main model never sees pixels.
-        # Affects gateway platforms, the TUI, and CLI /attach.  vision_analyze
-        # remains available as a tool regardless of this setting — the routing
-        # only controls how inbound user images are presented.
-        "image_input_mode": "auto",
    },
    
    "terminal": {
@@ -1051,20 +1037,6 @@ DEFAULT_CONFIG = {
        "seen": {},
    },

-    # ``hermes update`` behaviour.
-    "updates": {
-        # Run a full ``hermes backup``-style zip of HERMES_HOME before every
-        # ``hermes update``.  Backups land in ``<HERMES_HOME>/backups/`` and
-        # can be restored with ``hermes import <path>``.  Off by default —
-        # on large HERMES_HOME directories the zip can add minutes to every
-        # update.  Set to true to re-enable, or pass ``--backup`` to opt in
-        # for a single update run.
-        "pre_update_backup": False,
-        # How many pre-update backup zips to retain.  Older ones are pruned
-        # automatically after each successful backup.
-        "backup_keep": 5,
-    },
-
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1254,22 +1226,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "GMI_API_KEY": {
-        "description": "GMI Cloud API key",
-        "prompt": "GMI Cloud API key",
-        "url": "https://www.gmicloud.ai/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "GMI_BASE_URL": {
-        "description": "GMI Cloud base URL override",
-        "prompt": "GMI Cloud base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -4252,45 +4208,3 @@ def config_command(args):
        print("  hermes config path      Show config file path")
        print("  hermes config env-path  Show .env file path")
        sys.exit(1)
-
-
-# ── Profile-driven env var injection ─────────────────────────────────────────
-# Any provider registered in providers/ with auth_type="api_key" automatically
-# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
-# Runs once at import time.
-
-_profile_env_vars_injected = False
-
-
-def _inject_profile_env_vars() -> None:
-    """Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
-
-    Called once at module load time. Idempotent — repeated calls are no-ops.
-    """
-    global _profile_env_vars_injected
-    if _profile_env_vars_injected:
-        return
-    _profile_env_vars_injected = True
-    try:
-        from providers import list_providers
-        for _pp in list_providers():
-            if _pp.auth_type not in ("api_key",):
-                continue
-            for _var in _pp.env_vars:
-                if _var in OPTIONAL_ENV_VARS:
-                    continue
-                _is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
-                OPTIONAL_ENV_VARS[_var] = {
-                    "description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
-                    "prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
-                    "url": _pp.signup_url or None,
-                    "password": _is_key,
-                    "category": "provider",
-                    "advanced": True,
-                }
-    except Exception:
-        pass
-
-
-# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
-_inject_profile_env_vars()
@@ -45,13 +45,8 @@ def _pending_file() -> Path:
    Each entry: ``{"url": "...", "expire_at": <unix_ts>}``.  Scheduled
    DELETEs used to be handled by spawning a detached Python process per
    paste that slept for 6 hours; those accumulated forever if the user
-    ran ``hermes debug share`` repeatedly.
-
-    Deletion is now driven by the gateway's cron ticker
-    (``gateway/run.py::_start_cron_ticker``) which calls
-    ``_sweep_expired_pastes`` once per hour.  ``hermes debug share`` also
-    runs an opportunistic sweep on entry as a fallback for CLI-only users
-    who never start the gateway.
+    ran ``hermes debug share`` repeatedly.  We now persist the schedule
+    to disk and sweep expired entries on the next debug invocation.
    """
    return get_hermes_home() / "pastes" / "pending.json"

@@ -228,10 +223,9 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
    interpreters that never exited until the sleep completed.

    The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
-    and the gateway's cron ticker sweeps expired entries once per hour.
-    ``hermes debug share`` also runs an opportunistic sweep as a fallback
-    for CLI-only users.  If neither runs again, paste.rs's own retention
-    policy handles cleanup.
+    and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
+    every ``hermes debug`` invocation.  If the user never runs ``hermes debug``
+    again, paste.rs's own retention policy handles cleanup.
    """
    _record_pending(urls, delay_seconds=delay_seconds)

@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
    "KIMI_CN_API_KEY",
-    "GMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -164,84 +163,6 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
        check_warn("Could not verify systemd linger", f"({linger_detail})")


-_APIKEY_PROVIDERS_CACHE: list | None = None
-
-
-def _build_apikey_providers_list() -> list:
-    """Build the API-key provider health-check list once and cache it.
-
-    Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
-    Base list augmented with any ProviderProfile with auth_type="api_key" not
-    already present — adding providers/*.py is sufficient to get into doctor.
-    """
-    _static = [
-        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
-        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("StepFun Step Plan", ("STEPFUN_API_KEY",),                          "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
-        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
-        ("Arcee AI",         ("ARCEEAI_API_KEY",),                           "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
-        ("GMI Cloud",        ("GMI_API_KEY",),                               "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
-        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                          "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
-        ("Hugging Face",     ("HF_TOKEN",),                                  "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
-        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                            "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
-        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                        "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax: the /anthropic endpoint doesn't support /models; use the /v1 surface.
-        ("MiniMax",          ("MINIMAX_API_KEY",),                           "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                        "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
-        ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",),                       "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
-        ("Kilo Code",        ("KILOCODE_API_KEY",),                          "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
-        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                      "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        # OpenCode Go has no shared /models endpoint; skip the health check.
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                       None,                                  "OPENCODE_GO_BASE_URL", False),
-    ]
-    _known_names = {t[0] for t in _static}
-    # Also index by profile canonical name so profiles without display_name
-    # don't create duplicate entries for providers already in the static list.
-    _known_canonical: set[str] = set()
-    _name_to_canonical = {
-        "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
-        "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
-        "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
-        "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
-        "Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
-        "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
-        "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
-        "OpenCode Go": "opencode-go",
-    }
-    for _label, _canonical in _name_to_canonical.items():
-        _known_canonical.add(_canonical)
-    try:
-        from providers import list_providers
-        from providers.base import ProviderProfile as _PP
-        for _pp in list_providers():
-            if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
-                continue
-            _label = _pp.display_name or _pp.name
-            if _label in _known_names or _pp.name in _known_canonical:
-                continue
-            # Separate API-key vars from base-URL override vars — the health-check
-            # loop sends the first found value as Authorization: Bearer, so a URL
-            # string must never be picked.
-            _key_vars = tuple(
-                v for v in _pp.env_vars
-                if not v.endswith("_BASE_URL") and not v.endswith("_URL")
-            )
-            _base_var = next(
-                (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
-                None,
-            )
-            if not _key_vars:
-                continue
-            _models_url = (
-                (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
-                if _pp.base_url else None
-            )
-            _static.append((_label, _key_vars, _models_url, _base_var, True))
-    except Exception:
-        pass
-    return _static
-
-
 def run_doctor(args):
    """Run diagnostic checks."""
    should_fix = getattr(args, 'fix', False)
@@ -1009,11 +930,26 @@ def run_doctor(args):

    # -- API-key providers --
    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
-    # Cached at module level after first build — profiles auto-extend it.
-    global _APIKEY_PROVIDERS_CACHE
-    if _APIKEY_PROVIDERS_CACHE is None:
-        _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
-    _apikey_providers = _APIKEY_PROVIDERS_CACHE
+    # If supports_models_endpoint is False, we skip the health check and just show "configured"
+    _apikey_providers = [
+        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
+        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
+        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
+        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
+        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
+        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
+        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
+        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
+        # OpenCode Go has no shared /models endpoint; skip the health check.
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         None,                                  "OPENCODE_GO_BASE_URL", False),
+    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
        for _ev in _env_vars:
@@ -44,7 +44,6 @@ Usage:
 """

 import argparse
-import json
 import os
 import shutil
 import subprocess
@@ -596,22 +595,17 @@ def _session_browse_picker(sessions: list) -> Optional[str]:


 def _resolve_last_session(source: str = "cli") -> Optional[str]:
-    """Look up the most recently-used session ID for a source."""
-    db = None
+    """Look up the most recent session ID for a source."""
    try:
        from hermes_state import SessionDB

        db = SessionDB()
        sessions = db.search_sessions(source=source, limit=1)
-        return sessions[0]["id"] if sessions else None
+        db.close()
+        if sessions:
+            return sessions[0]["id"]
    except Exception:
        pass
-    finally:
-        if db is not None:
-            try:
-                db.close()
-            except Exception:
-                pass
    return None


@@ -766,20 +760,9 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
    return None


-def _read_tui_active_session_file(path: Optional[str]) -> Optional[str]:
-    if not path:
-        return None
-    try:
-        data = json.loads(Path(path).read_text(encoding="utf-8"))
-        sid = str(data.get("session_id") or "").strip()
-        return sid or None
-    except Exception:
-        return None
-
-
-def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Optional[str] = None) -> None:
+def _print_tui_exit_summary(session_id: Optional[str]) -> None:
    """Print a shell-visible epilogue after TUI exits."""
-    target = _read_tui_active_session_file(active_session_file) or session_id or _resolve_last_session(source="tui")
+    target = session_id or _resolve_last_session(source="tui")
    if not target:
        return

@@ -829,29 +812,8 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
    )


-_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
-
-
 def _tui_need_npm_install(root: Path) -> bool:
-    """True when @hermes/ink is missing or node_modules is behind package-lock.json.
-
-    Compares ``package-lock.json`` against ``node_modules/.package-lock.json``
-    (npm's hidden lockfile) by **content**, not mtime: git checkouts and npm
-    rewrites can bump the root lockfile's timestamp even when installed deps
-    already match, which used to trigger a spurious "Installing TUI
-    dependencies" on every launch.
-
-    For each entry in the root lock's ``packages`` map:
-      - missing from hidden lock → reinstall (unless the entry is marked
-        ``optional`` or ``peer``, which npm may intentionally skip per platform)
-      - present but with differing fields (excluding npm-written runtime
-        annotations like ``ideallyInert``) → reinstall
-
-    Extra entries that exist only in the hidden lock are ignored — stale
-    transitives left over from a removed dependency don't break runtime and
-    we'd rather not force a reinstall for them. Falls back to mtime
-    comparison if either lockfile is unparseable.
-    """
+    """True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull)."""
    ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
    if not ink.is_file():
        return True
@@ -861,35 +823,7 @@ def _tui_need_npm_install(root: Path) -> bool:
    marker = root / "node_modules" / ".package-lock.json"
    if not marker.is_file():
        return True
-
-    # Compare lockfile contents, not mtimes: git checkouts and npm rewrites
-    # can bump the root lockfile timestamp even when installed deps already
-    # match. Fall back to mtime when either file is unparseable.
-    try:
-        wanted = json.loads(lock.read_text(encoding="utf-8")).get("packages") or {}
-        installed = json.loads(marker.read_text(encoding="utf-8")).get("packages") or {}
-    except (OSError, UnicodeDecodeError, json.JSONDecodeError):
-        return lock.stat().st_mtime > marker.stat().st_mtime
-
-    def comparable(pkg: dict) -> dict:
-        return {k: v for k, v in pkg.items() if k not in _NPM_LOCK_RUNTIME_KEYS}
-
-    for name, pkg in wanted.items():
-        if not name:
-            continue
-
-        if not isinstance(pkg, dict):
-            continue
-
-        if name not in installed:
-            if pkg.get("optional") or pkg.get("peer"):
-                continue
-            return True
-
-        if isinstance(installed[name], dict) and comparable(pkg) != comparable(installed[name]):
-            return True
-
-    return False
+    return lock.stat().st_mtime > marker.stat().st_mtime


 def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
@@ -1103,14 +1037,7 @@ def _launch_tui(
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"

-    import tempfile
-
    env = os.environ.copy()
-    active_session_fd, active_session_file = tempfile.mkstemp(
-        prefix="hermes-tui-active-session-", suffix=".json"
-    )
-    os.close(active_session_fd)
-    env["HERMES_TUI_ACTIVE_SESSION_FILE"] = active_session_file
    env["HERMES_PYTHON_SRC_ROOT"] = os.environ.get(
        "HERMES_PYTHON_SRC_ROOT", str(PROJECT_ROOT)
    )
@@ -1138,20 +1065,13 @@ def _launch_tui(
        env["HERMES_TUI_RESUME"] = resume_session_id

    argv, cwd = _make_tui_argv(tui_dir, tui_dev)
-    code: Optional[int] = None
    try:
-        try:
-            code = subprocess.call(argv, cwd=str(cwd), env=env)
-        except KeyboardInterrupt:
-            code = 130
+        code = subprocess.call(argv, cwd=str(cwd), env=env)
+    except KeyboardInterrupt:
+        code = 130

-        if code in (0, 130):
-            _print_tui_exit_summary(resume_session_id, active_session_file)
-    finally:
-        try:
-            os.unlink(active_session_file)
-        except OSError:
-            pass
+    if code in (0, 130):
+        _print_tui_exit_summary(resume_session_id)

    sys.exit(code)

@@ -1528,21 +1448,6 @@ def cmd_model(args):
    select_provider_and_model(args=args)


-def _is_profile_api_key_provider(provider_id: str) -> bool:
-    """Return True when provider_id maps to a profile with auth_type='api_key'.
-
-    Used as a catch-all in select_provider_and_model() so that new providers
-    declared in providers/*.py automatically dispatch to _model_flow_api_key_provider
-    without requiring an explicit elif branch here.
-    """
-    try:
-        from providers import get_provider_profile
-        _p = get_provider_profile(provider_id)
-        return _p is not None and _p.auth_type == "api_key"
-    except Exception:
-        return False
-
-
 def select_provider_and_model(args=None):
    """Core provider selection + model picking logic.

@@ -1832,10 +1737,9 @@ def select_provider_and_model(args=None):
        "huggingface",
        "xiaomi",
        "arcee",
-        "gmi",
        "nvidia",
        "ollama-cloud",
-    ) or _is_profile_api_key_provider(selected_provider):
+    ):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -3428,26 +3332,7 @@ def _model_flow_named_custom(config, provider_info):
            provider_entry = providers_cfg.get(provider_key)
            if isinstance(provider_entry, dict):
                provider_entry["default_model"] = model_name
-                # Only persist an inline api_key when the user originally had
-                # one (either a literal secret or a ``${VAR}`` template). When
-                # the entry relies on ``key_env``, do not synthesize a
-                # ``${key_env}`` api_key — the runtime already resolves the
-                # key from ``key_env`` directly, and writing the resolved
-                # secret (or even a synthesized template) would silently
-                # downgrade credential hygiene on entries that intentionally
-                # keep plaintext out of ``config.yaml``. See issue #15803.
-                original_api_key_ref = str(
-                    provider_info.get("api_key_ref", "") or ""
-                ).strip()
-                original_api_key = str(
-                    provider_info.get("api_key", "") or ""
-                ).strip()
-                had_inline_api_key = bool(original_api_key_ref or original_api_key)
-                if (
-                    had_inline_api_key
-                    and config_api_key
-                    and not str(provider_entry.get("api_key", "") or "").strip()
-                ):
+                if config_api_key and not str(provider_entry.get("api_key", "") or "").strip():
                    provider_entry["api_key"] = config_api_key
                if key_env and not str(provider_entry.get("key_env", "") or "").strip():
                    provider_entry["key_env"] = key_env
@@ -6238,96 +6123,6 @@ def _ensure_fhs_path_guard() -> None:
        print("    (reload your shell or run 'source ~/.bashrc' to pick it up)")


-def _run_pre_update_backup(args) -> None:
-    """Create a full zip backup of HERMES_HOME before running the update.
-
-    Gated on ``updates.pre_update_backup`` in config (default false).  Off
-    by default because the zip can add minutes to every update on large
-    HERMES_HOME directories.  The ``--backup`` flag on ``hermes update``
-    opts in for a single run; ``--no-backup`` forces it off when config
-    has it enabled.  Never raises — a backup failure should not block the
-    update itself.
-    """
-    # CLI flags win over config.  --no-backup beats --backup if both are set.
-    if getattr(args, "no_backup", False):
-        print("◆ Pre-update backup: skipped (--no-backup)")
-        print()
-        return
-
-    force_backup = bool(getattr(args, "backup", False))
-
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-    except Exception as exc:
-        logging.getLogger(__name__).debug("Could not load config for pre-update backup: %s", exc)
-        cfg = {}
-
-    updates_cfg = cfg.get("updates", {}) if isinstance(cfg, dict) else {}
-    enabled = updates_cfg.get("pre_update_backup", False)
-    keep = updates_cfg.get("backup_keep", 5)
-
-    if not enabled and not force_backup:
-        # Silent by default — the backup is off, most users don't need to
-        # hear about it on every update.  They can opt in via --backup
-        # or by flipping the config knob.
-        return
-
-    try:
-        from hermes_cli.backup import create_pre_update_backup
-    except Exception as exc:
-        print(f"⚠ Pre-update backup: could not load backup module ({exc}); continuing update.")
-        print()
-        return
-
-    print("◆ Creating pre-update backup...")
-    t0 = _time.monotonic()
-    try:
-        out_path = create_pre_update_backup(keep=int(keep))
-    except Exception as exc:  # defensive — helper already swallows, but just in case
-        print(f"  ⚠ Backup failed: {exc}")
-        print("  Continuing with update.")
-        print()
-        return
-
-    elapsed = _time.monotonic() - t0
-
-    if out_path is None:
-        print("  ⚠ Backup skipped (no files found or write failed); continuing update.")
-        print()
-        return
-
-    try:
-        size_bytes = out_path.stat().st_size
-    except OSError:
-        size_bytes = 0
-
-    # Human-readable size
-    size_str = f"{size_bytes} B"
-    for unit in ("KB", "MB", "GB"):
-        if size_bytes < 1024:
-            break
-        size_bytes /= 1024
-        size_str = f"{size_bytes:.1f} {unit}"
-
-    # Render path using display_hermes_home so the user sees ~/.hermes/...
-    try:
-        from hermes_constants import get_hermes_home, display_hermes_home
-        home = get_hermes_home()
-        try:
-            display_path = f"{display_hermes_home()}/{out_path.relative_to(home)}"
-        except ValueError:
-            display_path = str(out_path)
-    except Exception:
-        display_path = str(out_path)
-
-    print(f"  Saved:    {display_path} ({size_str}, {elapsed:.1f}s)")
-    print(f"  Restore:  hermes import {out_path}")
-    print(f"  Disable:  omit --backup (backups are off by default)")
-    print(f"            set updates.pre_update_backup: false in config.yaml")
-    print()
-
-
 def cmd_update(args):
    """Update Hermes Agent to the latest version.

@@ -6370,10 +6165,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
    print("⚕ Updating Hermes Agent...")
    print()

-    # Pre-update backup — runs before any git/file mutation so users can
-    # always roll back to the exact state they had before this update.
-    _run_pre_update_backup(args)
-
    # Try git-based update first, fall back to ZIP download on Windows
    # when git file I/O is broken (antivirus, NTFS filter drivers, etc.)
    use_zip_update = False
@@ -6523,22 +6314,6 @@ def _cmd_update_impl(args, gateway_mode: bool):

        print(f"→ Found {commit_count} new commit(s)")

-        # Snapshot critical state (state.db, config, pairing JSONs, etc.)
-        # before pulling so a user can recover if something goes wrong.
-        # Issue #15733 reported missing pairing data after an update; even
-        # though `git pull` can't touch $HERMES_HOME, this is cheap
-        # belt-and-suspenders insurance and gives the user something to
-        # restore from via `/snapshot list` / `/snapshot restore <id>`.
-        try:
-            from hermes_cli.backup import create_quick_snapshot
-
-            snap_id = create_quick_snapshot(label="pre-update")
-            if snap_id:
-                print(f"  ✓ Pre-update snapshot: {snap_id}")
-        except Exception as exc:
-            # Never let a snapshot failure block an update.
-            logger.debug("Pre-update snapshot failed: %s", exc)
-
        print("→ Pulling updates...")
        update_succeeded = False
        try:
@@ -7633,22 +7408,6 @@ def cmd_logs(args):
    )


-def _build_provider_choices() -> list[str]:
-    """Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
-    try:
-        from hermes_cli.models import CANONICAL_PROVIDERS as _cp
-        return ["auto"] + [p.slug for p in _cp]
-    except Exception:
-        # Fallback: static list guarantees the CLI always works
-        return [
-            "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
-            "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
-            "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
-            "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
-            "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
-        ]
-
-
 def main():
    """Main entry point for hermes CLI."""
    parser = argparse.ArgumentParser(
@@ -7842,7 +7601,29 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=_build_provider_choices(),
+        choices=[
+            "auto",
+            "openrouter",
+            "nous",
+            "openai-codex",
+            "copilot-acp",
+            "copilot",
+            "anthropic",
+            "gemini",
+            "xai",
+            "ollama-cloud",
+            "huggingface",
+            "zai",
+            "kimi-coding",
+            "kimi-coding-cn",
+            "stepfun",
+            "minimax",
+            "minimax-cn",
+            "kilocode",
+            "xiaomi",
+            "arcee",
+            "nvidia",
+        ],
        default=None,
        help="Inference provider (default: auto)",
    )
@@ -9761,18 +9542,6 @@ Examples:
        default=False,
        help="Check whether an update is available without installing anything",
    )
-    update_parser.add_argument(
-        "--no-backup",
-        action="store_true",
-        default=False,
-        help="Skip the pre-update backup for this run (overrides updates.pre_update_backup)",
-    )
-    update_parser.add_argument(
-        "--backup",
-        action="store_true",
-        default=False,
-        help="Force a pre-update backup for this run (off by default; overrides updates.pre_update_backup)",
-    )
    update_parser.set_defaults(func=cmd_update)

    # =========================================================================
@@ -278,14 +278,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "trinity-large-preview",
        "trinity-mini",
    ],
-    "gmi": [
-        "zai-org/GLM-5.1-FP8",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
-        "google/gemini-3.1-flash-lite-preview",
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-    ],
    "opencode-zen": [
        "kimi-k2.5",
        "gpt-5.4-pro",
@@ -717,6 +709,7 @@ class ProviderEntry(NamedTuple):
    label: str
    tui_desc: str   # detailed description for `hermes model` TUI

+
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
@@ -742,7 +735,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
-    ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
@@ -750,25 +742,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
 ]

-# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
-# that is not already in the list above.  Adding providers/*.py is sufficient
-# to expose a new provider in the model picker, /model, and all downstream
-# consumers — no edits to this file needed.
-_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
-try:
-    from providers import list_providers as _list_providers_for_canonical
-    for _pp in _list_providers_for_canonical():
-        if _pp.name in _canonical_slugs:
-            continue
-        if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
-            continue  # non-api-key flows need bespoke picker UX; skip auto-inject
-        _label = _pp.display_name or _pp.name
-        _desc = _pp.description or f"{_label} (direct API)"
-        CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
-        _canonical_slugs.add(_pp.name)
-except Exception:
-    pass
-
 # Derived dicts — used throughout the codebase
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
@@ -796,8 +769,6 @@ _PROVIDER_ALIASES = {
    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -1878,19 +1849,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
            except Exception:
                pass
-    if normalized == "gmi":
-        try:
-            from hermes_cli.auth import resolve_api_key_provider_credentials
-
-            creds = resolve_api_key_provider_credentials("gmi")
-            api_key = str(creds.get("api_key") or "").strip()
-            base_url = str(creds.get("base_url") or "").strip()
-            if api_key and base_url:
-                live = fetch_api_models(api_key, base_url)
-                if live:
-                    return live
-        except Exception:
-            pass
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -1903,34 +1861,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-
-    # ── Profile-based generic live fetch (all simple api-key providers) ──
-    # Handles any provider registered in providers/ with auth_type="api_key".
-    # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
-    try:
-        from providers import get_provider_profile
-        from hermes_cli.auth import resolve_api_key_provider_credentials
-
-        _p = get_provider_profile(normalized)
-        if _p and _p.auth_type == "api_key" and _p.base_url:
-            try:
-                creds = resolve_api_key_provider_credentials(normalized)
-                api_key = str(creds.get("api_key") or "").strip()
-                base_url = str(creds.get("base_url") or "").strip()
-            except Exception:
-                api_key, base_url = "", _p.base_url
-            if not base_url:
-                base_url = _p.base_url
-            if api_key:
-                live = _p.fetch_models(api_key=api_key)
-                if live:
-                    return live
-            # Use profile's fallback_models if defined
-            if _p.fallback_models:
-                return list(_p.fallback_models)
-    except Exception:
-        pass
-
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -2296,52 +2226,6 @@ def copilot_model_api_mode(
    return "chat_completions"


-# Azure Foundry model families that require the Responses API.  Azure
-# rejects /chat/completions against these deployments with
-# ``400 "The requested operation is unsupported."`` — the same payload Bob
-# Dobolina hit in April 2026 on ``gpt-5.3-codex`` while ``gpt-4o-pure`` on
-# the same endpoint worked fine.  Keep the patterns broad enough to cover
-# vendor-renamed deployments (e.g. ``gpt-5.3-codex``, ``gpt-5-codex``,
-# ``gpt-5.4``, ``o1-preview``) but tight enough to leave GPT-4 / 3.5 / Llama /
-# Mistral / Grok deployments on chat completions.
-_AZURE_FOUNDRY_RESPONSES_PREFIXES = (
-    "codex",       # codex-*, codex-mini
-    "gpt-5",       # gpt-5, gpt-5.x, gpt-5-codex, gpt-5.x-codex
-    "o1",          # o1, o1-preview, o1-mini
-    "o3",          # o3, o3-mini
-    "o4",          # o4, o4-mini
-)
-
-
-def azure_foundry_model_api_mode(model_name: Optional[str]) -> Optional[str]:
-    """Infer Azure Foundry api_mode from a deployment/model name.
-
-    Returns ``"codex_responses"`` when the model name matches a family that
-    only accepts the Responses API on Azure Foundry (GPT-5.x, codex, o1/o3/o4
-    reasoning models).  Returns ``None`` otherwise — the caller should fall
-    back to the configured/default api_mode (typically ``chat_completions``)
-    so GPT-4o, GPT-4 Turbo, Llama, Mistral, etc. keep working.
-
-    Intentionally does NOT return ``anthropic_messages``; Anthropic-style
-    Azure endpoints are disambiguated by URL (``/anthropic`` suffix) in
-    ``runtime_provider._detect_api_mode_for_url`` and by the user setting
-    ``model.api_mode: anthropic_messages`` explicitly.
-    """
-    raw = str(model_name or "").strip().lower()
-    if not raw:
-        return None
-    # Strip any vendor/ prefix a user may have copied from OpenRouter / Copilot.
-    if "/" in raw:
-        raw = raw.rsplit("/", 1)[-1]
-    # gpt-5-mini speaks chat completions on Copilot but Azure Foundry deploys
-    # the full gpt-5 family uniformly on Responses API — don't carve an
-    # exception here.
-    for prefix in _AZURE_FOUNDRY_RESPONSES_PREFIXES:
-        if raw.startswith(prefix):
-            return "codex_responses"
-    return None
-
-
 def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
    """Normalize OpenCode config IDs to the bare model slug used in API requests."""
    provider = normalize_provider(provider_id)
@@ -79,20 +79,6 @@ VALID_HOOKS: Set[str] = {
    #   {"action": "allow"}  /  None             -> normal dispatch
    # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
    "pre_gateway_dispatch",
-    # Approval lifecycle hooks. Fired by tools/approval.py when a dangerous
-    # command needs user approval -- fires BOTH for CLI-interactive prompts
-    # and for gateway/ACP approvals (Telegram, Discord, Slack, TUI, etc.).
-    # Observers only: return values are ignored. Plugins cannot veto or
-    # pre-answer an approval from these hooks (use pre_tool_call to block
-    # a tool before it reaches approval).
-    #
-    # Kwargs for pre_approval_request:
-    #   command: str, description: str, pattern_key: str, pattern_keys: list[str],
-    #   session_key: str, surface: "cli" | "gateway"
-    # Kwargs for post_approval_response: same as above plus
-    #   choice: "once" | "session" | "always" | "deny" | "timeout"
-    "pre_approval_request",
-    "post_approval_response",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -163,12 +163,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("GMI_API_KEY",),
-        base_url_override="https://api.gmi-serving.com/v1",
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "ollama-cloud": HermesOverlay(
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
@@ -303,10 +297,6 @@ ALIASES: Dict[str, str] = {
    "arcee-ai": "arcee",
    "arceeai": "arcee",

-    # gmi
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
-
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -329,7 +319,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
-    "gmi": "GMI Cloud",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -214,6 +214,10 @@ def _resolve_runtime_from_pool_entry(
        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
    elif provider == "openrouter":
        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "xai":
+        api_mode = "codex_responses"
+    elif provider == "nous":
+        api_mode = "chat_completions"
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@@ -227,32 +231,11 @@ def _resolve_runtime_from_pool_entry(
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
            if configured_mode:
                api_mode = configured_mode
-        # Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
-        # /chat/completions on these with 400 "operation unsupported" — see
-        # azure_foundry_model_api_mode() for rationale.  Skip when the user
-        # explicitly picked anthropic_messages (Anthropic-style endpoint).
-        if effective_model and api_mode != "anthropic_messages":
-            try:
-                from hermes_cli.models import azure_foundry_model_api_mode
-
-                inferred = azure_foundry_model_api_mode(effective_model)
-            except Exception:
-                inferred = None
-            if inferred:
-                api_mode = inferred
        # For Anthropic-style endpoints, strip /v1 suffix
        if api_mode == "anthropic_messages":
            base_url = re.sub(r"/v1/?$", "", base_url)
    else:
        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
-        # Use profile api_mode for all other known providers
-        try:
-            from providers import get_provider_profile
-            _p = get_provider_profile(provider)
-            if _p and _p.api_mode:
-                api_mode = _p.api_mode
-        except Exception:
-            pass
        # Honour model.base_url from config.yaml when the configured provider
        # matches this provider — same pattern as the Anthropic branch above.
        # Only override when the pool entry has no explicit base_url (i.e. it
@@ -270,21 +253,12 @@ def _resolve_runtime_from_pool_entry(
            from hermes_cli.models import opencode_model_api_mode
            api_mode = opencode_model_api_mode(provider, effective_model)
        else:
-            # Try profile api_mode first, then auto-detect from URL
-            try:
-                from providers import get_provider_profile
-                _p = get_provider_profile(provider)
-                if _p and _p.api_mode:
-                    api_mode = _p.api_mode
-            except Exception:
-                pass
-            if api_mode == "chat_completions":
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-                # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
-                # codex_responses).
-                detected = _detect_api_mode_for_url(base_url)
-                if detected:
-                    api_mode = detected
+            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
+            detected = _detect_api_mode_for_url(base_url)
+            if detected:
+                api_mode = detected

    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
@@ -634,7 +608,6 @@ def _resolve_azure_foundry_runtime(
    model_cfg: Dict[str, Any],
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
-    target_model: Optional[str] = None,
 ) -> Dict[str, Any]:
    """Resolve an Azure Foundry runtime entry.

@@ -655,22 +628,6 @@ def _resolve_azure_foundry_runtime(
        cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
        cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"

-    # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
-    # reasoning models as Responses-API-only.  Calling /chat/completions
-    # against them returns 400 "The requested operation is unsupported."
-    # Upgrade api_mode when the model name matches, unless the user has
-    # explicitly chosen anthropic_messages (Anthropic-style endpoint).
-    effective_model = str(target_model or model_cfg.get("default") or "").strip()
-    if effective_model and cfg_api_mode != "anthropic_messages":
-        try:
-            from hermes_cli.models import azure_foundry_model_api_mode
-
-            inferred = azure_foundry_model_api_mode(effective_model)
-        except Exception:
-            inferred = None
-        if inferred:
-            cfg_api_mode = inferred
-
    env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
    base_url = explicit_base_url_clean or cfg_base_url or env_base_url
    if not base_url:
@@ -907,7 +864,6 @@ def resolve_runtime_provider(
            model_cfg=_get_model_config(),
            explicit_api_key=explicit_api_key,
            explicit_base_url=explicit_base_url,
-            target_model=target_model,
        )
        return azure_runtime

@@ -22,8 +22,6 @@ import sqlite3
 import threading
 import time
 from pathlib import Path
-
-from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
 from typing import Any, Callable, Dict, List, Optional, TypeVar

@@ -33,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 10
+SCHEMA_VERSION = 9

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -121,32 +119,6 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
 END;
 """

-# Trigram FTS5 table for CJK substring search.  The default unicode61
-# tokenizer splits CJK characters into individual tokens, breaking phrase
-# matching.  The trigram tokenizer creates overlapping 3-byte sequences so
-# substring queries work natively for any script (CJK, Thai, etc.).
-FTS_TRIGRAM_SQL = """
-CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
-    content,
-    content=messages,
-    content_rowid=id,
-    tokenize='trigram'
-);
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-"""
-

 class SessionDB:
    """
@@ -394,18 +366,6 @@ class SessionDB:
                except sqlite3.OperationalError:
                    pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 9")
-            if current_version < 10:
-                # v10: trigram FTS5 table for CJK/substring search.
-                # Created via FTS_TRIGRAM_SQL below; backfill existing messages.
-                try:
-                    cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-                except sqlite3.OperationalError:
-                    cursor.executescript(FTS_TRIGRAM_SQL)
-                    cursor.execute(
-                        "INSERT INTO messages_fts_trigram(rowid, content) "
-                        "SELECT id, content FROM messages WHERE content IS NOT NULL"
-                    )
-                cursor.execute("UPDATE schema_version SET version = 10")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -423,12 +383,6 @@ class SessionDB:
        except sqlite3.OperationalError:
            cursor.executescript(FTS_SQL)

-        # Trigram FTS5 for CJK/substring search
-        try:
-            cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_TRIGRAM_SQL)
-
        self._conn.commit()

    # =========================================================================
@@ -1201,10 +1155,7 @@ class SessionDB:

        messages = []
        for row in rows:
-            content = row["content"]
-            if row["role"] in {"user", "assistant"} and isinstance(content, str):
-                content = sanitize_context(content).strip()
-            msg = {"role": row["role"], "content": content}
+            msg = {"role": row["role"], "content": row["content"]}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
            if row["tool_name"]:
@@ -1340,16 +1291,6 @@ class SessionDB:
        return sanitized.strip()


-    @staticmethod
-    def _is_cjk_codepoint(cp: int) -> bool:
-        return (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
-                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
-                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
-                0x3000 <= cp <= 0x303F or    # CJK Symbols
-                0x3040 <= cp <= 0x309F or    # Hiragana
-                0x30A0 <= cp <= 0x30FF or    # Katakana
-                0xAC00 <= cp <= 0xD7AF)      # Hangul Syllables
-
    @staticmethod
    def _contains_cjk(text: str) -> bool:
        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1365,11 +1306,6 @@ class SessionDB:
                return True
        return False

-    @classmethod
-    def _count_cjk(cls, text: str) -> int:
-        """Count CJK characters in text."""
-        return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
-
    def search_messages(
        self,
        query: str,
@@ -1440,113 +1376,52 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """

-        # CJK queries bypass the unicode61 FTS5 table.  The default tokenizer
-        # splits CJK characters into individual tokens, so "大别山项目" becomes
-        # "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
-        # missing exact phrase matches.
-        #
-        # For queries with 3+ CJK characters, we use the trigram FTS5 table
-        # (indexed substring matching with ranking and snippets).  For shorter
-        # CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
-        # bytes = 3 CJK chars), so we fall back to LIKE.
-        is_cjk = self._contains_cjk(query)
-        if is_cjk:
-            raw_query = query.strip('"').strip()
-            cjk_count = self._count_cjk(raw_query)
-
-            if cjk_count >= 3:
-                # Trigram FTS5 path — quote each non-operator token to handle
-                # FTS5 special chars (%, *, etc.) while preserving boolean
-                # operators (AND, OR, NOT) for multi-term queries.
-                tokens = raw_query.split()
-                parts = []
-                for tok in tokens:
-                    if tok.upper() in ("AND", "OR", "NOT"):
-                        parts.append(tok)
-                    else:
-                        parts.append('"' + tok.replace('"', '""') + '"')
-                trigram_query = " ".join(parts)
-                tri_where = ["messages_fts_trigram MATCH ?"]
-                tri_params: list = [trigram_query]
-                if source_filter is not None:
-                    tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    tri_params.extend(source_filter)
-                if exclude_sources is not None:
-                    tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    tri_params.extend(exclude_sources)
-                if role_filter:
-                    tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    tri_params.extend(role_filter)
-                tri_sql = f"""
-                    SELECT
-                        m.id,
-                        m.session_id,
-                        m.role,
-                        snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
-                        m.content,
-                        m.timestamp,
-                        m.tool_name,
-                        s.source,
-                        s.model,
-                        s.started_at AS session_started
-                    FROM messages_fts_trigram
-                    JOIN messages m ON m.id = messages_fts_trigram.rowid
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
-                    LIMIT ? OFFSET ?
-                """
-                tri_params.extend([limit, offset])
-                with self._lock:
-                    try:
-                        tri_cursor = self._conn.execute(tri_sql, tri_params)
-                    except sqlite3.OperationalError:
-                        matches = []
-                    else:
-                        matches = [dict(row) for row in tri_cursor.fetchall()]
-            else:
-                # Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
-                # Fall back to LIKE substring search.
-                escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-                like_where = ["m.content LIKE ? ESCAPE '\\'"]
-                like_params: list = [f"%{escaped}%"]
-                if source_filter is not None:
-                    like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    like_params.extend(source_filter)
-                if exclude_sources is not None:
-                    like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    like_params.extend(exclude_sources)
-                if role_filter:
-                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    like_params.extend(role_filter)
-                like_sql = f"""
-                    SELECT m.id, m.session_id, m.role,
-                           substr(m.content,
-                                  max(1, instr(m.content, ?) - 40),
-                                  120) AS snippet,
-                           m.content, m.timestamp, m.tool_name,
-                           s.source, s.model, s.started_at AS session_started
-                    FROM messages m
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(like_where)}
-                    ORDER BY m.timestamp DESC
-                    LIMIT ? OFFSET ?
-                """
-                like_params.extend([limit, offset])
-                # instr() parameter goes first in the bound list
-                like_params = [raw_query] + like_params
-                with self._lock:
-                    like_cursor = self._conn.execute(like_sql, like_params)
-                    matches = [dict(row) for row in like_cursor.fetchall()]
-        else:
-            with self._lock:
-                try:
-                    cursor = self._conn.execute(sql, params)
-                except sqlite3.OperationalError:
-                    # FTS5 query syntax error despite sanitization — return empty
+        with self._lock:
+            try:
+                cursor = self._conn.execute(sql, params)
+            except sqlite3.OperationalError:
+                # FTS5 query syntax error despite sanitization — return empty
+                # unless query contains CJK (fall back to LIKE below)
+                if not self._contains_cjk(query):
                    return []
-                else:
-                    matches = [dict(row) for row in cursor.fetchall()]
+                matches = []
+            else:
+                matches = [dict(row) for row in cursor.fetchall()]
+
+        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+        # characters individually, causing multi-character queries to fail.
+        if not matches and self._contains_cjk(query):
+            raw_query = query.strip('"').strip()
+            like_where = ["m.content LIKE ?"]
+            like_params: list = [f"%{raw_query}%"]
+            if source_filter is not None:
+                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                like_params.extend(source_filter)
+            if exclude_sources is not None:
+                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                like_params.extend(exclude_sources)
+            if role_filter:
+                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                like_params.extend(role_filter)
+            like_sql = f"""
+                SELECT m.id, m.session_id, m.role,
+                       substr(m.content,
+                              max(1, instr(m.content, ?) - 40),
+                              120) AS snippet,
+                       m.content, m.timestamp, m.tool_name,
+                       s.source, s.model, s.started_at AS session_started
+                FROM messages m
+                JOIN sessions s ON s.id = m.session_id
+                WHERE {' AND '.join(like_where)}
+                ORDER BY m.timestamp DESC
+                LIMIT ? OFFSET ?
+            """
+            like_params.extend([limit, offset])
+            # instr() parameter goes first in the bound list
+            like_params = [raw_query] + like_params
+            with self._lock:
+                like_cursor = self._conn.execute(like_sql, like_params)
+                matches = [dict(row) for row in like_cursor.fetchall()]

        # Add surrounding context (1 message before + after each match).
        # Done outside the lock so we don't hold it across N sequential queries.
@@ -1606,32 +1481,16 @@ class SessionDB:
        limit: int = 20,
        offset: int = 0,
    ) -> List[Dict[str, Any]]:
-        """List sessions, optionally filtered by source.
-
-        Returns rows enriched with a computed ``last_active`` column (latest
-        message timestamp for the session, falling back to ``started_at``),
-        ordered by most-recently-used first.
-        """
-        select_with_last_active = (
-            "SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
-            "FROM sessions s "
-            "LEFT JOIN ("
-            "SELECT session_id, MAX(timestamp) AS last_active "
-            "FROM messages GROUP BY session_id"
-            ") m ON m.session_id = s.id "
-        )
+        """List sessions, optionally filtered by source."""
        with self._lock:
            if source:
                cursor = self._conn.execute(
-                    f"{select_with_last_active}"
-                    "WHERE s.source = ? "
-                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
+                    "SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
                    (source, limit, offset),
                )
            else:
                cursor = self._conn.execute(
-                    f"{select_with_last_active}"
-                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
+                    "SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
                    (limit, offset),
                )
            return [dict(row) for row in cursor.fetchall()]
@@ -7,7 +7,9 @@
  perSystem = { pkgs, system, lib, ... }:
    let
      hermes-agent = inputs.self.packages.${system}.default;
-      hermesVenv = hermes-agent.hermesVenv;
+      hermesVenv = pkgs.callPackage ./python.nix {
+        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };

      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };

@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
          echo "ok" > $out/result
        '';

-        # Verify extraPythonPackages PYTHONPATH injection
-        extra-python-packages = let
-          testPkg = pkgs.python312Packages.pyfiglet;
-          hermesWithExtra = hermes-agent.override {
-            extraPythonPackages = [ testPkg ];
-          };
-        in pkgs.runCommand "hermes-extra-python-packages" { } ''
-          set -e
-          echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
-
-          grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
-          echo "PASS: PYTHONPATH present in wrapper"
-
-          grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: test package path not in PYTHONPATH"; exit 1)
-          echo "PASS: test package path found in wrapper"
-
-          echo "=== Checking base package has no PYTHONPATH ==="
-          if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
-            echo "FAIL: base package should not have PYTHONPATH"; exit 1
-          fi
-          echo "PASS: base package clean"
-
-          echo "=== All extraPythonPackages checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
        # ── Config merge + round-trip test ────────────────────────────────
        # Tests the merge script (Nix activation behavior) across 7
        # scenarios, then verifies Python's load_config() reads correctly.
@@ -1,186 +0,0 @@
-# nix/hermes-agent.nix — Overridable Hermes Agent package
-#
-# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
-# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
-{
-  lib,
-  stdenv,
-  makeWrapper,
-  callPackage,
-  python312,
-  nodejs_22,
-  ripgrep,
-  git,
-  openssh,
-  ffmpeg,
-  tirith,
-  # Flake inputs — passed explicitly by packages.nix and overlays.nix
-  uv2nix,
-  pyproject-nix,
-  pyproject-build-systems,
-  npm-lockfile-fix,
-  # Overridable parameters
-  extraPythonPackages ? [ ],
-}:
-let
-  hermesVenv = callPackage ./python.nix {
-    inherit uv2nix pyproject-nix pyproject-build-systems;
-  };
-
-  hermesNpmLib = callPackage ./lib.nix {
-    inherit npm-lockfile-fix;
-  };
-
-  hermesTui = callPackage ./tui.nix {
-    inherit hermesNpmLib;
-  };
-
-  hermesWeb = callPackage ./web.nix {
-    inherit hermesNpmLib;
-  };
-
-  bundledSkills = lib.cleanSourceWith {
-    src = ../skills;
-    filter = path: _type: !(lib.hasInfix "/index-cache/" path);
-  };
-
-  runtimeDeps = [
-    nodejs_22
-    ripgrep
-    git
-    openssh
-    ffmpeg
-    tirith
-  ];
-
-  runtimePath = lib.makeBinPath runtimeDeps;
-
-  sitePackagesPath = python312.sitePackages;
-
-  # Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
-  # Without this, a plugin listing e.g. requests as a dep would fail at runtime
-  # if requests isn't already in the sealed uv2nix venv.
-  allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
-
-  pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
-
-  pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
-  uvLockHash =
-    if builtins.pathExists ../uv.lock then
-      builtins.hashString "sha256" (builtins.readFile ../uv.lock)
-    else
-      "none";
-in
-stdenv.mkDerivation {
-  pname = "hermes-agent";
-  version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
-
-  dontUnpack = true;
-  dontBuild = true;
-  nativeBuildInputs = [ makeWrapper ];
-
-  installPhase = ''
-    runHook preInstall
-
-    mkdir -p $out/share/hermes-agent $out/bin
-    cp -r ${bundledSkills} $out/share/hermes-agent/skills
-    cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
-
-    mkdir -p $out/ui-tui
-    cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
-
-    ${lib.concatMapStringsSep "\n"
-      (name: ''
-        makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
-          --suffix PATH : "${runtimePath}" \
-          --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
-          --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
-          --set HERMES_TUI_DIR $out/ui-tui \
-          --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
-          --set HERMES_NODE ${nodejs_22}/bin/node \
-          ${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
-      '')
-      [
-        "hermes"
-        "hermes-agent"
-        "hermes-acp"
-      ]
-    }
-
-    ${lib.optionalString (extraPythonPackages != [ ]) ''
-      echo "=== Checking for plugin/core package collisions ==="
-      ${hermesVenv}/bin/python3 -c "
-import pathlib, sys, re
-
-def canonical(name):
-    return re.sub(r'[-_.]+', '-', name).lower()
-
-# Collect core venv package names
-core = set()
-venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
-for di in venv_sp.glob('*.dist-info'):
-    meta = di / 'METADATA'
-    if meta.exists():
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                core.add(canonical(line.split(':', 1)[1].strip()))
-                break
-
-# Check each extra package for collisions
-extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
-for edir in extras_dirs:
-    sp = pathlib.Path(edir) / '${sitePackagesPath}'
-    if not sp.exists():
-        continue
-    for di in sp.glob('*.dist-info'):
-        meta = di / 'METADATA'
-        if not meta.exists():
-            continue
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                pkg = canonical(line.split(':', 1)[1].strip())
-                if pkg in core:
-                    print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
-                    print(f'  from: {di}', file=sys.stderr)
-                    print(f'  Remove this dependency from extraPythonPackages.', file=sys.stderr)
-                    sys.exit(1)
-                break
-
-print('No collisions found.')
-      "
-      echo "=== No collisions ==="
-    ''}
-
-    runHook postInstall
-  '';
-
-  passthru = {
-    inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
-
-    devShellHook = ''
-      STAMP=".nix-stamps/hermes-agent"
-      STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
-      if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-        echo "hermes-agent: installing Python dependencies..."
-        uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
-        source .venv/bin/activate
-        uv pip install -e ".[all]"
-        [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
-        [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
-        mkdir -p .nix-stamps
-        echo "$STAMP_VALUE" > "$STAMP"
-      else
-        source .venv/bin/activate
-        export HERMES_PYTHON=${hermesVenv}/bin/python3
-      fi
-    '';
-  };
-
-  meta = with lib; {
-    description = "AI agent with advanced tool-calling capabilities";
-    homepage = "https://github.com/NousResearch/hermes-agent";
-    mainProgram = "hermes";
-    license = licenses.mit;
-    platforms = platforms.unix;
-  };
-}
@@ -28,8 +28,6 @@

  let
    cfg = config.services.hermes-agent;
-    effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
-      else cfg.package.override { inherit (cfg) extraPythonPackages; };
    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -458,52 +456,6 @@
        description = "Extra packages available on PATH.";
      };

-      extraPlugins = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Directory-based plugin packages to symlink into the hermes plugins
-          directory. Each package should contain a plugin.yaml and __init__.py
-          at its root. Hermes discovers these automatically on startup.
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.fetchFromGitHub {
-              owner = "stephenschoettler";
-              repo = "hermes-lcm";
-              name = "hermes-lcm";
-              rev = "v0.7.0";
-              hash = "sha256-...";
-            })
-          ]
-        '';
-      };
-
-      extraPythonPackages = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Python packages to add to PYTHONPATH for entry-point plugin discovery.
-          These are pip-packaged plugins that register via the
-          hermes_agent.plugins entry-point group. Each package must be built
-          with the same Python interpreter as hermes (python312).
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.python312Packages.buildPythonPackage {
-              pname = "rtk-hermes";
-              version = "1.0.0";
-              src = pkgs.fetchFromGitHub {
-                owner = "ogallotti";
-                repo = "rtk-hermes";
-                rev = "main";
-                hash = "sha256-...";
-              };
-            })
-          ]
-        '';
-      };
-
      restart = mkOption {
        type = types.str;
        default = "always";
@@ -618,7 +570,7 @@
      # so interactive shells share state (sessions, skills, cron) with the
      # gateway service instead of creating a separate ~/.hermes/.
      (lib.mkIf cfg.addToSystemPackages {
-        environment.systemPackages = [ effectivePackage ];
+        environment.systemPackages = [ cfg.package ];
        environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
      })

@@ -629,16 +581,6 @@
        });
      })

-      # ── Assertions ─────────────────────────────────────────────────────
-      {
-        assertions = let
-          names = map lib.getName cfg.extraPlugins;
-        in [{
-          assertion = (lib.length names) == (lib.length (lib.unique names));
-          message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
-        }];
-      }
-
      # ── Warnings ──────────────────────────────────────────────────────
      (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
        warnings = [
@@ -660,7 +602,6 @@
          "d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/logs   2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.workingDirectory}         2770 ${cfg.user} ${cfg.group} - -"
        ];
@@ -682,7 +623,7 @@
          find ${cfg.stateDir}/.hermes -maxdepth 1 \
            \( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
            -exec chmod g+rw {} + 2>/dev/null || true
-          for _subdir in cron sessions logs memories plugins; do
+          for _subdir in cron sessions logs memories; do
            mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
            chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
            chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -791,22 +732,6 @@ HERMES_NIX_ENV_EOF
          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
            install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
          '') cfg.documents)}
-
-        # ── Declarative plugins ─────────────────────────────────────────
-        # Remove stale managed symlinks (plugins removed from config)
-        find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
-
-        ${lib.concatStringsSep "\n" (map (plugin:
-          let
-            name = lib.getName plugin;
-          in ''
-            if [ ! -f "${plugin}/plugin.yaml" ]; then
-              echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
-              exit 1
-            fi
-            ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-            chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-          '') cfg.extraPlugins)}
        '';
      }

@@ -837,7 +762,7 @@ HERMES_NIX_ENV_EOF
            # reads them at Python startup — no systemd EnvironmentFile needed.

            ExecStart = lib.concatStringsSep " " ([
-              "${effectivePackage}/bin/hermes"
+              "${cfg.package}/bin/hermes"
              "gateway"
            ] ++ cfg.extraArgs);

@@ -860,7 +785,7 @@ HERMES_NIX_ENV_EOF
          };

          path = [
-            effectivePackage
+            cfg.package
            pkgs.bash
            pkgs.coreutils
            pkgs.git
@@ -885,11 +810,11 @@ HERMES_NIX_ENV_EOF

          preStart = ''
            # Stable symlinks — container references these, not store paths directly
-            ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
+            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint

            # GC roots so nix-collect-garbage doesn't remove store paths in use
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true

            # Check if container needs (re)creation
@@ -1,10 +0,0 @@
-# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
-{ inputs, ... }:
-{
-  flake.overlays.default = final: _: {
-    hermes-agent = final.callPackage ./hermes-agent.nix {
-      inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
-    };
-  };
-}
@@ -4,19 +4,120 @@
  perSystem =
    { pkgs, inputs', ... }:
    let
-      hermesAgent = pkgs.callPackage ./hermes-agent.nix {
+      hermesVenv = pkgs.callPackage ./python.nix {
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };
+
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };
+
+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
+      # Import bundled skills, excluding runtime caches
+      bundledSkills = pkgs.lib.cleanSourceWith {
+        src = ../skills;
+        filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
+      };
+
+      hermesWeb = pkgs.callPackage ./web.nix {
+        inherit hermesNpmLib;
+      };
+
+      runtimeDeps = with pkgs; [
+        nodejs_22
+        ripgrep
+        git
+        openssh
+        ffmpeg
+        tirith
+      ];
+
+      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
+
+      # Lockfile hashes for dev shell stamps
+      pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
+      uvLockHash =
+        if builtins.pathExists ../uv.lock then
+          builtins.hashString "sha256" (builtins.readFile ../uv.lock)
+        else
+          "none";
    in
    {
      packages = {
-        default = hermesAgent;
-        tui = hermesAgent.hermesTui;
-        web = hermesAgent.hermesWeb;
+        default = pkgs.stdenv.mkDerivation {
+          pname = "hermes-agent";
+          version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;

-        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
-          packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
+          dontUnpack = true;
+          dontBuild = true;
+          nativeBuildInputs = [ pkgs.makeWrapper ];
+
+          installPhase = ''
+            runHook preInstall
+
+            mkdir -p $out/share/hermes-agent $out/bin
+            cp -r ${bundledSkills} $out/share/hermes-agent/skills
+            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
+
+            # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
+            mkdir -p $out/ui-tui
+            cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
+
+            ${pkgs.lib.concatMapStringsSep "\n"
+              (name: ''
+                makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
+                  --suffix PATH : "${runtimePath}" \
+                  --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
+                  --set HERMES_TUI_DIR $out/ui-tui \
+                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
+              '')
+              [
+                "hermes"
+                "hermes-agent"
+                "hermes-acp"
+              ]
+            }
+
+            runHook postInstall
+          '';
+
+          passthru.devShellHook = ''
+            STAMP=".nix-stamps/hermes-agent"
+            STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
+            if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+              echo "hermes-agent: installing Python dependencies..."
+              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
+              source .venv/bin/activate
+              uv pip install -e ".[all]"
+              [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
+              [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
+              mkdir -p .nix-stamps
+              echo "$STAMP_VALUE" > "$STAMP"
+            else
+              source .venv/bin/activate
+              export HERMES_PYTHON=${hermesVenv}/bin/python3
+            fi
+          '';
+
+          meta = with pkgs.lib; {
+            description = "AI agent with advanced tool-calling capabilities";
+            homepage = "https://github.com/NousResearch/hermes-agent";
+            mainProgram = "hermes";
+            license = licenses.mit;
+            platforms = platforms.unix;
+          };
+        };
+
+        tui = hermesTui;
+        web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
        };
      };
    };
@@ -7,7 +7,6 @@
  pyproject-nix,
  pyproject-build-systems,
  stdenv,
-  dependency-groups ? [ "all" ],
 }:
 let
  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -97,5 +96,5 @@ let
      ]);
 in
 pythonSet.mkVirtualEnv "hermes-agent-env" {
-  hermes-agent = dependency-groups;
+  hermes-agent = [ "all" ];
 }
@@ -17,7 +17,6 @@ pkgs.buildNpmPackage (npm // {
  inherit src npmDeps version;

  doCheck = false;
-  npmFlags = [ "--legacy-peer-deps" ];

  installPhase = ''
    runHook preInstall
@@ -1,7 +1,7 @@
 ---
 name: touchdesigner-mcp
 description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
-version: 1.1.0
+version: 1.0.0
 author: kshitijk4poor
 license: MIT
 metadata:
@@ -204,9 +204,8 @@ win.par.winopen.pulse()
 | `td_input_clear` | Stop input automation |
 | `td_op_screen_rect` | Get screen coords of a node |
 | `td_click_screen_point` | Click a point in a screenshot |
-| `td_screen_point_to_global` | Convert screenshot pixel to absolute screen coords |

-The table above covers the 32 tools used in typical creative workflows. The remaining 4 tools (`td_project_quit`, `td_test_session`, `td_dev_log`, `td_clear_dev_log`) are admin/dev-mode utilities — see `references/mcp-tools.md` for the full 36-tool reference with complete parameter schemas.
+See `references/mcp-tools.md` for full parameter schemas.

 ## Key Implementation Rules

@@ -333,21 +332,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
 | `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
 | `references/python-api.md` | TD Python: op(), scripting, extensions |
 | `references/troubleshooting.md` | Connection diagnostics, debugging |
-| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
-| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
-| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
-| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
-| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
-| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
-| `references/animation.md` | LFOs, timers, keyframes, easing, expression-driven motion |
-| `references/midi-osc.md` | MIDI/OSC controllers, TouchOSC, multi-machine sync |
-| `references/particles.md` | POPs and legacy particleSOP — emission, forces, collisions |
-| `references/projection-mapping.md` | Multi-window output, corner pin, mesh warp, edge blending |
-| `references/external-data.md` | HTTP, WebSocket, MQTT, Serial, TCP, webserverDAT |
-| `references/panel-ui.md` | Custom params, panel COMPs, button/slider/field, panelExecuteDAT |
-| `references/replicator.md` | replicatorCOMP — data-driven cloning, layouts, callbacks |
-| `references/dat-scripting.md` | Execute DAT family — chop/dat/parameter/panel/op/executeDAT |
-| `references/3d-scene.md` | Lighting rigs, shadows, IBL/cubemaps, multi-camera, PBR |
 | `scripts/setup.sh` | Automated setup script |

 ---
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
 ```python
 # td_execute_python:
 for c in list(root.children):
-    if c.valid and c.name.startswith('my_'):
+    if c.valid and c.name.startswith('promo_'):
        c.destroy()
-# ... then create my_audio, my_shader etc. in same script → CRASHES
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
 ```

 **CORRECT (two separate calls):**
 ```python
 # Call 1: td_execute_python — clean only
 for c in list(root.children):
-    if c.valid and c.name.startswith('my_'):
+    if c.valid and c.name.startswith('promo_'):
        c.destroy()

 # Call 2: td_execute_python — build (separate MCP call)
-audio = root.create(audiofileinCHOP, 'my_audio')
+audio = root.create(audiofileinCHOP, 'promo_audio')
 # ... rest of build
 ```

@@ -361,13 +361,21 @@ win.par.winopen.pulse()

 `out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.

-### 32. Audio-reactive GLSL: TD-side pipeline
+### 32. Audio-reactive GLSL: dual-layer sync pipeline

-For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.

 **Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.

-### 33. twozero MCP: prefer native tools
+### 33. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.

 **Always prefer native MCP tools over td_execute_python:**
 - `td_create_operator` over `root.create()` scripts (handles viewport positioning)
@@ -417,16 +425,13 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still

 **a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.

-**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
-
-Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.

 Diagnostic sequence when FPS=0:
-1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
-2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
-3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
-4. Check for other blocking CHOPs (audiodevin, etc.)
-5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)

 ### 39. Recording while FPS=0 produces empty or near-empty files

@@ -479,20 +484,9 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
 op("/project1/shader_code").text = shader_string
 ```

-### 42. td_execute_python DOES return print() output — use it for debugging
+### 42. td_execute_python does NOT return stdout or print() output

-`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
-
-However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
-```python
-# CORRECT — appears in response:
-print('value:', some_value)
-
-# WRONG — not reliably in response:
-result = some_value
-```
-
-For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.

 ### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()

@@ -502,203 +496,13 @@ clean = response_text.rsplit('[fps', 1)[0]
 data = json.loads(clean)
 ```

-### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
+### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`

-Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.

-**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
-```bash
-# Record in TD first, then extract frames:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
-```
-
-If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
-
-### 45. Heavy shaders cook below record FPS — many duplicate frames in output
-
-A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
-```bash
-# Extract at 24fps from a 60fps recording of an 8fps shader:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
-```
-Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
-
-### 46. Recording duration is manual — no auto-stop at audio end
+### 45. Recording duration is manual — no auto-stop at audio end

 MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
 ```bash
 ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
-```
-
-### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
-
-When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
-
-**How to verify audio is actually playing:**
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
-
-### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
-
-Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
-
-```glsl
-float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
-bass = clamp(bass, 0.0, 3.0);   // prevent whiteout
-mids = clamp(mids, 0.0, 3.0);
-hi = clamp(hi, 0.0, 3.0);
-```
-
-Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
-
-### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
-
-Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
-
-```bash
-# Center-crop from 1280x1280 to 1280x720:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
-```
-
-Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
-
-## Advanced Patterns (pitfalls 51+)
-
-### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
-
-```python
-# CORRECT
-src.outputConnectors[0].connect(dst.inputConnectors[0])
-# WRONG — raises IndexError or AttributeError
-src.outputs[0].connect(dst.inputs[0])
-```
-
-For feedback TOP, BOTH are required:
-```python
-fb.par.top = target.path
-target.outputConnectors[0].connect(fb.inputConnectors[0])
-```
-
-### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
-
-Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
-
-**Workaround — frame capture + ffmpeg:**
-```python
-out = op('/project1/out')
-for i in range(300):
-    delay = i * 5
-    run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
-# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
-```
-
-### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
-
-```python
-start = me.fetch('cap_frame', 0)
-for i in range(60):
-    frame = start + i
-    op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
-me.store('cap_frame', start + 60)
-```
-Call 5 times for 300 frames. Each picks up where the last left off.
-
-### 54. GLSL TOP pixel shader requirements in TD 2025
-
-```glsl
-// REQUIRED — declare output
-layout(location = 0) out vec4 fragColor;
-
-void main() {
-    vec3 col = vec3(1.0, 0.0, 0.0);
-    fragColor = TDOutputSwizzle(vec4(col, 1.0));
-}
-```
-**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
-**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
-
-### 55. TOP.save() doesn't advance time — identical frames in tight loops
-
-`.save()` captures the current cooked frame without advancing TD's timeline:
-```python
-# WRONG — all frames identical
-for i in range(300):
-    op('/project1/out').save(f'frames/f_{i:04d}.png')
-
-# CORRECT — use run() with delayFrames
-for i in range(300):
-    delay = i * 5
-    run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
-```
-**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
-
-### 56. Feedback loop masks input changes — force switch during capture
-
-With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
-
-**Fix — force switch index per capture:**
-```python
-for i in range(300):
-    idx = (i // 8) % num_inputs
-    delay = i * 5
-    run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
-```
-
-### 57. Large td_execute_python scripts fail — split into incremental calls
-
-10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
-
-### 58. MCP instance reconnection after project.load()
-
-`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
-
-### 59. TOX reverse-engineering workflow
-
-```python
-comp = root.loadTox(r'/path/to/file.tox')
-comp.name = '_study_comp'
-for child in comp.children:
-    print(f'{child.name} ({child.OPType})')
-# Use td_get_operators_info, td_read_dat, check custom params
-```
-
-### 60. sliderCOMP naming — TD appends suffix
-
-TD auto-renames: `slider_brightness` → `slider_brightness1`. Always check names after creation.
-
-### 61. create() requires full operator type suffix
-
-```python
-# CORRECT
-proj.create('audiofileinCHOP', 'audio_in')
-proj.create('glslTOP', 'render')
-
-# WRONG — raises "Unknown operator type"
-proj.create('audiofilein', 'audio_in')
-proj.create('glsl', 'render')
-```
-
-### 62. Reparenting COMPs — use copyOPs, not connect()
-
-Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
-```python
-copied = target.copyOPs([source])  # preserves internal wiring
-source.destroy()
-# Re-wire external connections manually after the move
-```
-
-### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
-
-```python
-# CRASHES TD — don't do this
-echop = root.create(expressionCHOP, 'slider_ctrl')
-echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
-
-# WORKING — parameterCHOP as bridge
-pchop = root.create(parameterCHOP, 'slider_vals')
-pchop.par.ops = '/project1/controls'
-pchop.par.parameters = 'value0'
-pchop.par.custom = True
-pchop.par.builtin = False
 ```
@@ -1,131 +0,0 @@
-# google_meet plugin
-
-Let the hermes agent join a Google Meet call, transcribe it, optionally speak
-in it, and do the followup work afterwards.
-
-## What ships
-
-| Version | What | Status |
-|---|---|---|
-| v1 | Transcribe-only: Playwright joins Meet, scrapes captions to transcript file | ✓ ships by default |
-| v2 | Realtime duplex audio: bot speaks in-call via OpenAI Realtime + BlackHole/PulseAudio null-sink | ✓ opt in with `mode='realtime'` |
-| v3 | Remote node host: run the bot on a different machine than the gateway | ✓ opt in with `node='<name>'` |
-
-## Architecture
-
-```
-┌─ gateway (Linux box, where hermes runs) ────────────────────────────┐
-│                                                                      │
-│   agent → meet_join(url, mode='realtime', node='my-mac')             │
-│         │                                                            │
-│         └─ NodeClient ─── ws ────┐                                   │
-│                                  │                                   │
-└──────────────────────────────────┼───────────────────────────────────┘
-                                   │ wss (token auth)
-                                   ▼
-┌─ node host (user's Mac, signed-in Chrome lives here) ───────────────┐
-│                                                                      │
-│   NodeServer (from `hermes meet node run`)                           │
-│     │                                                                │
-│     ├─ start_bot → process_manager.start() → spawns meet_bot         │
-│     │                                                                │
-│     └─ meet_bot (Playwright)                                         │
-│        ├─ Chromium → meet.google.com                                 │
-│        ├─ caption scraper → transcript.txt                           │
-│        └─ (realtime mode only) RealtimeSpeaker thread                │
-│             ↓                                                        │
-│           OpenAI Realtime WS → speaker.pcm                           │
-│             ↓                                                        │
-│           paplay → null-sink ← Chrome fake mic                       │
-│                                                                      │
-└──────────────────────────────────────────────────────────────────────┘
-```
-
-Without v3: the whole right column runs on the gateway machine.
-Without v2: the "realtime" path is skipped; transcribe runs alone.
-
-## Files
-
-| Path | Purpose |
-|---|---|
-| `plugin.yaml` | manifest |
-| `__init__.py` | `register(ctx)` — registers 5 tools + `on_session_end` hook + `hermes meet` CLI |
-| `meet_bot.py` | Playwright bot subprocess (standalone, `python -m plugins.google_meet.meet_bot`) |
-| `process_manager.py` | local bot lifecycle + `enqueue_say` |
-| `tools.py` | agent-facing tools + node-routing helper |
-| `cli.py` | `hermes meet setup / auth / join / status / transcript / say / stop / node ...` |
-| `audio_bridge.py` | v2: PulseAudio null-sink (Linux) + BlackHole probe (macOS) |
-| `realtime/openai_client.py` | v2: `RealtimeSession` + `RealtimeSpeaker` (file-queue → OpenAI Realtime WS → PCM) |
-| `node/protocol.py` | v3: message envelope + validation |
-| `node/registry.py` | v3: `$HERMES_HOME/workspace/meetings/nodes.json` |
-| `node/server.py` | v3: `NodeServer` (runs on host machine) |
-| `node/client.py` | v3: `NodeClient` (used by tool handlers + CLI on gateway) |
-| `node/cli.py` | v3: `hermes meet node {run,list,approve,remove,status,ping}` |
-| `SKILL.md` | agent usage guide |
-
-## Local quick start
-
-```bash
-hermes plugins enable google_meet
-hermes meet install                                      # pip + Chromium
-hermes meet setup                                        # preflight
-hermes meet auth                                         # optional
-hermes meet join https://meet.google.com/abc-defg-hij    # transcribe
-```
-
-## Realtime mode
-
-Linux (preferred, most automated):
-```bash
-hermes meet install --realtime                     # installs pulseaudio-utils
-echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
-hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
-# then from the agent or CLI:
-hermes meet say "Good morning everyone, I'm the note-taker bot."
-```
-
-macOS:
-```bash
-hermes meet install --realtime     # runs: brew install blackhole-2ch ffmpeg
-# then — manually! — open System Settings → Sound → Input → BlackHole 2ch
-echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
-hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
-```
-
-On macOS, hermes will **not** switch your system audio input automatically — the
-user has to do it. This is deliberate: switching default input on a whim would
-be a surprising side effect.
-
-## Remote node host
-
-On the node machine (e.g. user's Mac with a signed-in Chrome):
-```bash
-pip install playwright websockets
-python -m playwright install chromium
-hermes plugins enable google_meet
-hermes meet node run --display-name my-mac --host 0.0.0.0 --port 18789
-# prints the bearer token on first run; copy it
-```
-
-On the gateway:
-```bash
-hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
-hermes meet node ping my-mac
-# now any meet_* tool call accepts node='my-mac' (or 'auto')
-```
-
-## Safety
-
- URL gate: only `https://meet.google.com/abc-defg-hij`, `/new`, `/lookup/<id>`.
- No calendar scanning, no auto-dial, no auto-consent announcement.
- Node server uses bearer-token auth; no key exchange, no TLS termination
-  built in — run it on a LAN or behind a reverse proxy you trust.
- One active meeting per (gateway, node) pair. A second `meet_join` leaves the first.
- `meet_say` refuses unless the active meeting was started with `mode='realtime'`.
-
-## Out of scope
-
- **Calendar scanning** — deliberately not implemented. Join URLs must be explicit.
- **Multi-tenant node sharing** — a node serves one gateway at a time.
- **Windows** — audio bridging isn't tested; `register()` no-ops on Windows.
- **System audio input switching on macOS** — user responsibility, not the bot's.
@@ -1,148 +0,0 @@
---
-name: google_meet
-description: Join a Google Meet call, transcribe live captions, optionally speak in realtime, and do the followup work afterwards. Use when the user asks the agent to sit in on a meeting, take notes, summarize, respond in-call, or action items from it.
-version: 0.2.0
-platforms:
-  - linux
-  - macos
-metadata:
-  hermes:
-    tags: [meetings, google-meet, transcription, realtime-voice]
---
-
-# google_meet
-
-## When to use
-
-The user says any of:
-
- "join my Meet at <url>"
- "take notes on this meeting"
- "summarize the meeting and send followups"
- "sit in on my standup"
- "be a bot in this call and speak up when X"
-
-## Two modes
-
-| Mode | What the bot does |
-|---|---|
-| `transcribe` (default) | Joins, enables captions, scrapes a transcript. Listen-only. |
-| `realtime` | Same as transcribe PLUS speaks into the meeting via OpenAI Realtime. The agent calls `meet_say(text)` and the bot's voice comes out of the call. |
-
-Pick `realtime` only when the user actually wants the agent to speak. It costs real money (OpenAI Realtime is pay-per-audio-minute) and requires a virtual audio device set up on the machine running the bot.
-
-## Two locations
-
-| Location | When |
-|---|---|
-| Local (default) | Gateway machine runs the Playwright bot directly. |
-| Remote node (`node="<name>"`) | Bot runs on a different machine that has a signed-in Chrome and (for realtime) a configured audio bridge. Useful when the gateway runs on a headless Linux box but the user's real signed-in Chrome lives on their Mac. |
-
-## Prerequisites the user must handle once
-
-Easiest path — run the built-in installer:
-
-```bash
-hermes plugins enable google_meet
-hermes meet install                 # pip deps + Chromium (transcribe only)
-hermes meet install --realtime      # + pulseaudio-utils / brew blackhole+ffmpeg
-hermes meet auth                    # optional; skips guest-lobby wait
-hermes meet setup                   # preflight checks
-```
-
-`hermes meet install --realtime` prompts before running `sudo apt-get` (Linux)
-or `brew install` (macOS). Pass `--yes` to skip the prompt. It will NOT touch
-your macOS default-input setting — you have to select BlackHole 2ch in
-System Settings yourself before starting a realtime meeting.
-
-Or do it manually:
-```bash
-pip install playwright websockets && python -m playwright install chromium
-
-# For realtime mode, additionally:
-#   Linux:  sudo apt install pulseaudio-utils
-#   macOS:  brew install blackhole-2ch ffmpeg
-#           → System Settings → Sound → Input → BlackHole 2ch
-#   Then set OPENAI_API_KEY or HERMES_MEET_REALTIME_KEY in ~/.hermes/.env
-```
-
-For a remote node:
-```bash
-# on the user's Mac (where Chrome is signed in):
-pip install playwright websockets && python -m playwright install chromium
-hermes plugins enable google_meet
-hermes meet node run --display-name my-mac    # persistent server
-# copy the printed token
-
-# on the gateway:
-hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
-hermes meet node ping my-mac                   # confirm reachable
-```
-
-Run `hermes meet setup` to preflight local prereqs.
-
-## Flow
-
-1. **Join** — call `meet_join(url=..., mode=..., node=...)`. Returns immediately.
-2. **Announce yourself** — no auto-consent. Say (in whatever channel the user is watching): "A Hermes agent bot is in this call taking notes."
-3. **Poll** — `meet_status()` for liveness, `meet_transcript(last=20)` for recent captions. Don't re-read the whole transcript every turn.
-4. **Speak (realtime only)** — `meet_say(text="...")` queues text for TTS. The speech lags by ~2s. Don't spam it.
-5. **Leave** — `meet_leave()` when done, or set `duration="30m"` on `meet_join` for auto-leave.
-6. **Follow up** — read `meet_transcript()` in full, summarize, and use regular tools to send the recap, file issues, schedule followups.
-
-## Tool reference
-
-| Tool | Parameters | Use |
-|---|---|---|
-| `meet_join` | `url`, `mode?`, `guest_name?`, `duration?`, `headed?`, `node?` | Start bot |
-| `meet_status` | `node?` | Liveness + progress |
-| `meet_transcript` | `last?`, `node?` | Read captions |
-| `meet_leave` | `node?` | Close bot |
-| `meet_say` | `text`, `node?` | Speak in realtime meeting |
-
-`node?` on all tools: pass a registered node name (or `"auto"` for the sole node) to operate a remote bot instead of a local one. Omit for local.
-
-## Important limits
-
- Captions are only as good as Google Meet's live captions. English-biased, lossy on overlapping speakers.
- Guest mode sits in the lobby until a host admits. Warn the user; `hermes meet auth` avoids this.
- **Lobby timeout**: if the host doesn't admit the bot within 5 minutes (configurable via `HERMES_MEET_LOBBY_TIMEOUT` env), the bot leaves and `meet_status` reports `leaveReason: "lobby_timeout"`.
- **One active meeting per install per location.** A second `meet_join` leaves the first.
- **Windows not supported.**
- Realtime mode needs a virtual audio device. If the audio bridge setup fails, the bot falls back to transcribe mode and flags it in `meet_status().error`.
- `meet_say` requires `mode='realtime'` on the originating `meet_join`. Calling it against a transcribe-mode meeting returns a clear error.
- **Barge-in is best-effort.** When a caption arrives attributed to a real participant while the bot is generating audio, the bot sends `response.cancel` to OpenAI Realtime. Captions take ~500ms to show up, so the bot will talk over the first second or so of a human interruption.
-
-## Status dict reference
-
-`meet_status()` returns (subset shown, there are more):
-
-| Key | Meaning |
-|---|---|
-| `inCall` | Past the lobby. False while waiting for admission. |
-| `lobbyWaiting` | Clicked "Ask to join", waiting on host. |
-| `joinAttemptedAt` / `joinedAt` | Timestamps for lobby-click and actual admission. |
-| `captioning` | Caption observer is installed. |
-| `transcriptLines` / `lastCaptionAt` | Transcript progress. |
-| `realtime` / `realtimeReady` | Realtime mode provisioned / WS connected. |
-| `realtimeDevice` | Audio device name the bot is feeding (e.g. `hermes_meet_src`). |
-| `audioBytesOut` / `lastAudioOutAt` | How much PCM the OpenAI session has produced. |
-| `lastBargeInAt` | Timestamp of the most recent `response.cancel` sent. |
-| `leaveReason` | `duration_expired`, `lobby_timeout`, `denied`, `page_closed`, or null. |
-| `error` | Last error (soft — bot may still be running). |
-
-## Transcript location
-
-Local:
-```
-$HERMES_HOME/workspace/meetings/<meeting-id>/transcript.txt
-```
-
-Remote node: transcript lives on the node host's disk. Use `meet_transcript(node=...)` to read it over RPC.
-
-## Safety
-
- URL regex: only `https://meet.google.com/...` URLs pass.
- No calendar scanning. No auto-dial.
- Remote nodes use bearer-token auth; tokens are generated on the node (32 hex chars, persisted in `$HERMES_HOME/workspace/meetings/node_token.json`) and must be copied to the gateway via `hermes meet node approve`.
- `meet_say` text is rate-limited by the OpenAI Realtime session; spam-protection is the bot's problem, not yours, but still — don't queue hundreds of lines.
@@ -1,103 +0,0 @@
-"""google_meet plugin — let the agent join a Meet call, transcribe it, follow up.
-
-v1: transcribe-only. Spawns a headless Chromium via Playwright, joins the Meet
-URL, enables live captions, scrapes them into a transcript file. The agent then
-has the transcript in its workspace and can do whatever followup work it needs
-using its regular tools.
-
-v2 (not in this PR): realtime duplex audio so the agent can speak in the
-meeting, via OpenAI Realtime / Gemini Live + BlackHole / PulseAudio null-sink.
-``meet_say`` exists as a stub today so the tool surface is stable.
-
-Explicit-by-design: only joins ``https://meet.google.com/`` URLs explicitly
-passed in. No calendar scanning, no auto-dial, no consent announcement.
-"""
-
-from __future__ import annotations
-
-import logging
-import platform
-
-from plugins.google_meet import process_manager as pm
-from plugins.google_meet.cli import register_cli as _register_meet_cli
-from plugins.google_meet.cli import meet_command as _meet_command
-from plugins.google_meet.tools import (
-    MEET_JOIN_SCHEMA,
-    MEET_LEAVE_SCHEMA,
-    MEET_SAY_SCHEMA,
-    MEET_STATUS_SCHEMA,
-    MEET_TRANSCRIPT_SCHEMA,
-    check_meet_requirements,
-    handle_meet_join,
-    handle_meet_leave,
-    handle_meet_say,
-    handle_meet_status,
-    handle_meet_transcript,
-)
-
-logger = logging.getLogger(__name__)
-
-
-_TOOLS = (
-    ("meet_join",       MEET_JOIN_SCHEMA,       handle_meet_join,       "📞"),
-    ("meet_status",     MEET_STATUS_SCHEMA,     handle_meet_status,     "🟢"),
-    ("meet_transcript", MEET_TRANSCRIPT_SCHEMA, handle_meet_transcript, "📝"),
-    ("meet_leave",      MEET_LEAVE_SCHEMA,      handle_meet_leave,      "👋"),
-    ("meet_say",        MEET_SAY_SCHEMA,        handle_meet_say,        "🗣️"),
-)
-
-
-def _on_session_end(**kwargs) -> None:
-    """Best-effort cleanup — if a meet bot is still running when the session
-    ends, leave the call so we don't orphan a headless Chromium.
-
-    No-ops when nothing is active. Swallows all exceptions — session end must
-    not fail because the bot cleanup hit an edge case.
-    """
-    try:
-        status = pm.status()
-        if status.get("ok") and status.get("alive"):
-            pm.stop(reason="session ended")
-    except Exception as e:  # pragma: no cover — defensive
-        logger.debug("google_meet on_session_end cleanup failed: %s", e)
-
-
-def register(ctx) -> None:
-    """Register tools, CLI, and lifecycle hooks.
-
-    Called once by the plugin loader when the plugin is enabled via
-    ``plugins.enabled`` in config.yaml.
-    """
-    # Windows is not supported in v1 — audio routing for v2 doesn't have a
-    # tested path there and guest-join Chromium is flakier. Refuse to register
-    # rather than half-working.
-    system = platform.system().lower()
-    if system not in ("linux", "darwin"):
-        logger.info(
-            "google_meet plugin: platform=%s not supported (linux/macos only)",
-            system,
-        )
-        return
-
-    for name, schema, handler, emoji in _TOOLS:
-        ctx.register_tool(
-            name=name,
-            toolset="google_meet",
-            schema=schema,
-            handler=handler,
-            check_fn=check_meet_requirements,
-            emoji=emoji,
-        )
-
-    ctx.register_cli_command(
-        name="meet",
-        help="Google Meet bot (join, transcribe, follow up)",
-        setup_fn=_register_meet_cli,
-        handler_fn=_meet_command,
-        description=(
-            "Let the hermes agent join a Google Meet call and scrape live "
-            "captions into a transcript. See: hermes meet setup"
-        ),
-    )
-
-    ctx.register_hook("on_session_end", _on_session_end)
@@ -1,244 +0,0 @@
-"""Virtual audio bridge for feeding generated speech into Chrome's mic.
-
-v2 module. Provisions a platform-specific virtual audio device so the
-Meet bot's Chromium instance can be pointed at an input source we
-control. The OpenAI Realtime client writes PCM bytes into this device;
-Chrome reads them as if they were coming from a microphone.
-
-Linux (primary): uses pactl (PulseAudio) to create a null-sink plus a
-virtual source whose master is the null-sink's monitor. Callers set
-PULSE_SOURCE=<source_name> in Chrome's env and pass the fake-mic flag.
-
-macOS: requires BlackHole 2ch to be installed. This module only
-verifies its presence and returns the device name; routing OS default
-input is left to the user (or a future switchaudio-osx integration) to
-avoid surprising the user's system audio state.
-
-Windows: not supported in v2.
-"""
-
-from __future__ import annotations
-
-import platform
-import subprocess
-from typing import Optional
-
-
-_BLACKHOLE_DEVICE = "BlackHole 2ch"
-
-
-class AudioBridge:
-    """Manages a virtual audio device for Chrome fake-mic input.
-
-    Call ``setup()`` once before launching the Meet bot and
-    ``teardown()`` when the session ends. ``teardown()`` is idempotent.
-    """
-
-    def __init__(self, name_prefix: str = "hermes_meet") -> None:
-        self._name_prefix = name_prefix
-        self._platform: Optional[str] = None
-        self._device_name: Optional[str] = None
-        self._write_target: Optional[str] = None
-        self._module_ids: list[int] = []
-        self._torn_down = False
-
-    # ── public properties ─────────────────────────────────────────────────
-
-    @property
-    def device_name(self) -> str:
-        if not self._device_name:
-            raise RuntimeError("AudioBridge not set up yet")
-        return self._device_name
-
-    @property
-    def write_target(self) -> str:
-        if not self._write_target:
-            raise RuntimeError("AudioBridge not set up yet")
-        return self._write_target
-
-    # ── lifecycle ─────────────────────────────────────────────────────────
-
-    def setup(self) -> dict:
-        """Provision the virtual audio device.
-
-        Returns a dict describing the device. Raises RuntimeError on
-        unsupported platforms or when required system tools are missing.
-        """
-        system = platform.system()
-        if system == "Linux":
-            return self._setup_linux()
-        if system == "Darwin":
-            return self._setup_darwin()
-        if system == "Windows":
-            raise RuntimeError("windows not supported in v2")
-        raise RuntimeError(f"unsupported platform: {system}")
-
-    def teardown(self) -> None:
-        """Release the virtual audio device. Idempotent."""
-        if self._torn_down:
-            return
-        # Only Linux needs explicit unloading.
-        if self._platform == "linux" and self._module_ids:
-            # Unload in reverse order (virtual-source before null-sink).
-            for mod_id in reversed(self._module_ids):
-                try:
-                    subprocess.run(
-                        ["pactl", "unload-module", str(mod_id)],
-                        check=False,
-                        capture_output=True,
-                    )
-                except Exception:
-                    # Best-effort teardown — never raise from here.
-                    pass
-            self._module_ids = []
-        self._torn_down = True
-
-    # ── platform impls ────────────────────────────────────────────────────
-
-    def _setup_linux(self) -> dict:
-        sink_name = f"{self._name_prefix}_sink"
-        src_name = f"{self._name_prefix}_src"
-
-        try:
-            sink_out = subprocess.run(
-                [
-                    "pactl",
-                    "load-module",
-                    "module-null-sink",
-                    f"sink_name={sink_name}",
-                    f"sink_properties=device.description=HermesMeetSink",
-                ],
-                check=True,
-                capture_output=True,
-                text=True,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                "pactl not found — install PulseAudio/pipewire-pulse"
-            ) from exc
-        except subprocess.CalledProcessError as exc:
-            raise RuntimeError(
-                f"pactl load-module null-sink failed: {exc.stderr or exc}"
-            ) from exc
-
-        sink_mod_id = self._parse_module_id(sink_out.stdout)
-
-        try:
-            src_out = subprocess.run(
-                [
-                    "pactl",
-                    "load-module",
-                    "module-virtual-source",
-                    f"source_name={src_name}",
-                    f"master={sink_name}.monitor",
-                ],
-                check=True,
-                capture_output=True,
-                text=True,
-            )
-        except subprocess.CalledProcessError as exc:
-            # Roll back the null-sink we just created so we don't leak it.
-            subprocess.run(
-                ["pactl", "unload-module", str(sink_mod_id)],
-                check=False,
-                capture_output=True,
-            )
-            raise RuntimeError(
-                f"pactl load-module virtual-source failed: {exc.stderr or exc}"
-            ) from exc
-
-        src_mod_id = self._parse_module_id(src_out.stdout)
-
-        self._platform = "linux"
-        self._device_name = src_name
-        self._write_target = sink_name
-        self._module_ids = [sink_mod_id, src_mod_id]
-        self._torn_down = False
-
-        return {
-            "platform": "linux",
-            "device_name": src_name,
-            "sample_rate": 48000,
-            "channels": 2,
-            "module_ids": list(self._module_ids),
-            "write_target": sink_name,
-        }
-
-    def _setup_darwin(self) -> dict:
-        try:
-            out = subprocess.check_output(
-                ["system_profiler", "SPAudioDataType"],
-                text=True,
-                stderr=subprocess.STDOUT,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                "system_profiler not found (macOS-only command)"
-            ) from exc
-        except subprocess.CalledProcessError as exc:
-            raise RuntimeError(
-                f"system_profiler failed: {exc.output}"
-            ) from exc
-
-        if "BlackHole" not in out:
-            raise RuntimeError(
-                "BlackHole virtual audio device not installed. "
-                "Install via: brew install blackhole-2ch"
-            )
-
-        self._platform = "darwin"
-        self._device_name = _BLACKHOLE_DEVICE
-        self._write_target = _BLACKHOLE_DEVICE
-        self._module_ids = []
-        self._torn_down = False
-
-        return {
-            "platform": "darwin",
-            "device_name": _BLACKHOLE_DEVICE,
-            "sample_rate": 48000,
-            "channels": 2,
-            "module_ids": [],
-            "write_target": _BLACKHOLE_DEVICE,
-        }
-
-    # ── helpers ──────────────────────────────────────────────────────────
-
-    @staticmethod
-    def _parse_module_id(stdout: str) -> int:
-        """pactl load-module prints the new module ID to stdout."""
-        text = (stdout or "").strip()
-        if not text:
-            raise RuntimeError("pactl load-module returned empty stdout")
-        # Take the last whitespace-separated token on the first non-empty line.
-        first = text.splitlines()[0].strip()
-        token = first.split()[-1]
-        try:
-            return int(token)
-        except ValueError as exc:
-            raise RuntimeError(
-                f"could not parse pactl module id from: {stdout!r}"
-            ) from exc
-
-
-def chrome_fake_audio_flags(bridge_info: dict) -> list[str]:
-    """Return Chrome flags for using the fake audio input.
-
-    The PulseAudio source is selected via the ``PULSE_SOURCE`` env var,
-    which callers must set in Chrome's environment before launch:
-
-        env["PULSE_SOURCE"] = bridge_info["device_name"]
-
-    On macOS the caller must ensure the system default audio input is
-    set to the returned BlackHole device (we do not flip that switch).
-    """
-    system = platform.system()
-    if system == "Linux":
-        # Chromium on Linux picks up the PulseAudio source selected via
-        # PULSE_SOURCE env var; the fake-ui flag skips the permission
-        # prompt so the bot can pick "use my mic" without user input.
-        return ["--use-fake-ui-for-media-stream"]
-    if system == "Darwin":
-        return ["--use-fake-ui-for-media-stream"]
-    if system == "Windows":
-        raise RuntimeError("windows not supported in v2")
-    raise RuntimeError(f"unsupported platform: {system}")
@@ -1,478 +0,0 @@
-"""CLI commands for the google_meet plugin.
-
-Wires ``hermes meet <subcommand>``:
-  setup       — preflight playwright, chromium, auth file, print fixes
-  auth        — open a browser to sign into Google, save storage state
-  join <url>  — join a Meet URL synchronously (also callable from the agent)
-  status      — print current bot state
-  transcript  — print the transcript
-  stop        — leave the current meeting
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import sys
-from pathlib import Path
-from typing import Optional
-
-from hermes_constants import get_hermes_home
-
-from plugins.google_meet import process_manager as pm
-from plugins.google_meet.meet_bot import _is_safe_meet_url
-
-
-def _auth_state_path() -> Path:
-    return Path(get_hermes_home()) / "workspace" / "meetings" / "auth.json"
-
-
-# ---------------------------------------------------------------------------
-# argparse wiring
-# ---------------------------------------------------------------------------
-
-def register_cli(subparser: argparse.ArgumentParser) -> None:
-    """Build the ``hermes meet`` argparse tree.
-
-    Called by :func:`_register_cli_commands` at plugin load time.
-    """
-    subs = subparser.add_subparsers(dest="meet_command")
-
-    subs.add_parser("setup", help="Preflight: playwright, chromium, auth")
-
-    inst_p = subs.add_parser(
-        "install",
-        help="Install prerequisites (pip deps, Chromium, platform audio tools)",
-    )
-    inst_p.add_argument(
-        "--realtime", action="store_true",
-        help="Also install realtime audio tools (pulseaudio-utils on Linux, BlackHole+ffmpeg on macOS). Uses sudo/brew, prompts before invoking either.",
-    )
-    inst_p.add_argument(
-        "--yes", "-y", action="store_true",
-        help="Answer yes to all prompts (use with care; will run sudo apt-get or brew without asking).",
-    )
-
-    subs.add_parser("auth", help="Sign in to Google and save session state")
-
-    join_p = subs.add_parser("join", help="Join a Meet URL")
-    join_p.add_argument("url", help="https://meet.google.com/...")
-    join_p.add_argument("--guest-name", default="Hermes Agent")
-    join_p.add_argument("--duration", default=None, help="e.g. 30m, 2h, 90s")
-    join_p.add_argument("--headed", action="store_true", help="show browser")
-    join_p.add_argument(
-        "--mode", choices=("transcribe", "realtime"), default="transcribe",
-        help="transcribe (default, listen-only) or realtime (speak via OpenAI Realtime)"
-    )
-    join_p.add_argument(
-        "--node", default=None,
-        help="remote node name, or 'auto' to use the sole registered node"
-    )
-
-    subs.add_parser("status", help="Print current Meet bot state")
-
-    tr_p = subs.add_parser("transcript", help="Print the scraped transcript")
-    tr_p.add_argument("--last", type=int, default=None)
-
-    say_p = subs.add_parser("say", help="Speak text in an active realtime meeting")
-    say_p.add_argument("text", help="what to say")
-    say_p.add_argument("--node", default=None)
-
-    subs.add_parser("stop", help="Leave the current meeting")
-
-    # v3: remote node host management.
-    node_p = subs.add_parser(
-        "node",
-        help="Manage remote meet node hosts (run/list/approve/remove/status/ping)",
-    )
-    try:
-        from plugins.google_meet.node.cli import register_cli as _register_node_cli
-        _register_node_cli(node_p)
-    except Exception as e:  # pragma: no cover — defensive
-        # If the node module fails to import for any reason (optional dep
-        # missing at import time etc.), leave the subparser present but
-        # flag it. The argparse dispatch will surface a clear error.
-        def _node_unavailable(args):
-            print(f"hermes meet node: module unavailable ({e})")
-            return 1
-        node_p.set_defaults(func=_node_unavailable)
-
-    subparser.set_defaults(func=meet_command)
-
-
-# ---------------------------------------------------------------------------
-# Dispatch
-# ---------------------------------------------------------------------------
-
-def meet_command(args: argparse.Namespace) -> int:
-    sub = getattr(args, "meet_command", None)
-    if not sub:
-        print("usage: hermes meet {setup,auth,join,status,transcript,say,stop,node}")
-        return 2
-    if sub == "setup":
-        return _cmd_setup()
-    if sub == "install":
-        return _cmd_install(
-            realtime=bool(getattr(args, "realtime", False)),
-            assume_yes=bool(getattr(args, "yes", False)),
-        )
-    if sub == "auth":
-        return _cmd_auth()
-    if sub == "join":
-        return _cmd_join(
-            url=args.url,
-            guest_name=args.guest_name,
-            duration=args.duration,
-            headed=args.headed,
-            mode=getattr(args, "mode", "transcribe"),
-            node=getattr(args, "node", None),
-        )
-    if sub == "status":
-        return _cmd_status()
-    if sub == "transcript":
-        return _cmd_transcript(last=args.last)
-    if sub == "say":
-        return _cmd_say(text=args.text, node=getattr(args, "node", None))
-    if sub == "stop":
-        return _cmd_stop()
-    if sub == "node":
-        # Dispatch was set by the node cli's register_cli; fall through to
-        # whatever its subparsers wired.
-        fn = getattr(args, "func", None)
-        if fn is None or fn is meet_command:
-            print("usage: hermes meet node {run,list,approve,remove,status,ping}")
-            return 2
-        return fn(args)
-    print(f"unknown subcommand: {sub}")
-    return 2
-
-
-# ---------------------------------------------------------------------------
-# Subcommand handlers
-# ---------------------------------------------------------------------------
-
-def _cmd_setup() -> int:
-    import platform as _p
-
-    print("google_meet preflight")
-    print("---------------------")
-
-    system = _p.system()
-    system_ok = system in ("Linux", "Darwin")
-    print(f"  platform       : {system}  [{'ok' if system_ok else 'unsupported'}]")
-
-    try:
-        import playwright  # noqa: F401
-        pw_ok = True
-        pw_msg = "installed"
-    except ImportError:
-        pw_ok = False
-        pw_msg = "NOT installed — run: pip install playwright"
-    print(f"  playwright     : {pw_msg}")
-
-    chromium_ok = False
-    chromium_msg = "unknown"
-    if pw_ok:
-        try:
-            from playwright.sync_api import sync_playwright
-            with sync_playwright() as p:
-                try:
-                    exe = p.chromium.executable_path
-                    if exe and Path(exe).exists():
-                        chromium_ok = True
-                        chromium_msg = f"ok ({exe})"
-                    else:
-                        chromium_msg = (
-                            "not installed — run: "
-                            "python -m playwright install chromium"
-                        )
-                except Exception as e:
-                    chromium_msg = f"probe failed: {e}"
-        except Exception as e:
-            chromium_msg = f"probe failed: {e}"
-    print(f"  chromium       : {chromium_msg}")
-
-    auth_path = _auth_state_path()
-    auth_ok = auth_path.is_file()
-    print(
-        "  google auth    : "
-        + (f"ok ({auth_path})" if auth_ok else "not saved — run: hermes meet auth")
-    )
-
-    print()
-    all_ok = system_ok and pw_ok and chromium_ok
-    if all_ok:
-        print(
-            "ready. Join a meeting:  "
-            "hermes meet join https://meet.google.com/abc-defg-hij"
-        )
-    else:
-        print("not ready yet — fix the items above.")
-    return 0 if all_ok else 1
-
-
-def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
-    """Install the plugin's prerequisites.
-
-    Always: pip install playwright + websockets, then
-    ``python -m playwright install chromium``.
-
-    With ``--realtime``: also install the platform audio bridge deps.
-      Linux : ``sudo apt-get install -y pulseaudio-utils``
-      macOS : ``brew install blackhole-2ch ffmpeg``  (+ remind the user
-              to select BlackHole as the default input device manually)
-
-    Prompts before every package-manager invocation unless ``--yes``.
-    Refuses to run on Windows.
-    """
-    import platform as _p
-    import shutil as _shutil
-    import subprocess as _sp
-
-    system = _p.system()
-    if system not in ("Linux", "Darwin"):
-        print(f"google_meet install: {system} is not supported (linux/macos only)")
-        return 1
-
-    def _confirm(prompt: str) -> bool:
-        if assume_yes:
-            return True
-        try:
-            ans = input(f"{prompt} [y/N] ").strip().lower()
-        except EOFError:
-            return False
-        return ans in ("y", "yes")
-
-    print("google_meet install")
-    print("-------------------")
-
-    # 1) pip deps — always safe, venv-scoped.
-    pip_pkgs = ["playwright", "websockets"]
-    print(f"\n[1/3] pip install: {' '.join(pip_pkgs)}")
-    try:
-        res = _sp.run(
-            [sys.executable, "-m", "pip", "install", "--upgrade", *pip_pkgs],
-            check=False,
-        )
-        if res.returncode != 0:
-            print("  pip install failed")
-            return 1
-    except Exception as e:
-        print(f"  pip install failed: {e}")
-        return 1
-
-    # 2) Playwright browsers — pulls chromium (~300MB first run).
-    print("\n[2/3] python -m playwright install chromium")
-    try:
-        res = _sp.run(
-            [sys.executable, "-m", "playwright", "install", "chromium"],
-            check=False,
-        )
-        if res.returncode != 0:
-            print("  playwright install failed (may already be installed)")
-    except Exception as e:
-        print(f"  playwright install failed: {e}")
-        return 1
-
-    # 3) Platform audio deps for realtime mode.
-    if realtime:
-        print("\n[3/3] realtime audio deps")
-        if system == "Linux":
-            if _shutil.which("paplay") and _shutil.which("pactl"):
-                print("  pulseaudio-utils already installed.")
-            else:
-                if not _confirm(
-                    "  install pulseaudio-utils? this runs `sudo apt-get install -y pulseaudio-utils`"
-                ):
-                    print("  skipped (you can run it manually later)")
-                else:
-                    cmd = ["sudo", "apt-get", "install", "-y", "pulseaudio-utils"]
-                    print(f"  $ {' '.join(cmd)}")
-                    res = _sp.run(cmd, check=False)
-                    if res.returncode != 0:
-                        print("  apt install failed — install pulseaudio-utils manually")
-        elif system == "Darwin":
-            have_bh = False
-            try:
-                out = _sp.check_output(["system_profiler", "SPAudioDataType"], text=True)
-                have_bh = "BlackHole" in out
-            except Exception:
-                pass
-            have_ffmpeg = bool(_shutil.which("ffmpeg"))
-            needs = []
-            if not have_bh:
-                needs.append("blackhole-2ch")
-            if not have_ffmpeg:
-                needs.append("ffmpeg")
-            if not needs:
-                print("  BlackHole and ffmpeg already installed.")
-            elif not _shutil.which("brew"):
-                print(
-                    "  missing: " + ", ".join(needs) + "\n"
-                    "  install Homebrew first (https://brew.sh) or install the packages manually."
-                )
-            else:
-                if not _confirm(f"  install via brew: {' '.join(needs)}?"):
-                    print("  skipped (you can run it manually later)")
-                else:
-                    cmd = ["brew", "install", *needs]
-                    print(f"  $ {' '.join(cmd)}")
-                    res = _sp.run(cmd, check=False)
-                    if res.returncode != 0:
-                        print("  brew install failed — install them manually")
-            print(
-                "\n  NOTE: macOS does not auto-route audio. Open\n"
-                "    System Settings → Sound → Input\n"
-                "  and select 'BlackHole 2ch' before starting a realtime meeting.\n"
-                "  hermes will not switch your default input for you."
-            )
-    else:
-        print("\n[3/3] skipped (pass --realtime to install audio tooling too)")
-
-    print("\ndone. verify with: hermes meet setup")
-    return 0
-
-
-def _cmd_auth() -> int:
-    """Open a headed Chromium, let the user sign in, save storage_state."""
-    try:
-        from playwright.sync_api import sync_playwright
-    except ImportError:
-        print(
-            "playwright is not installed. run:\n"
-            "  pip install playwright && python -m playwright install chromium"
-        )
-        return 1
-
-    path = _auth_state_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-
-    print(f"opening Chromium — sign in to Google, then return here and press Enter.")
-    print(f"saving storage state to: {path}")
-    try:
-        with sync_playwright() as pw:
-            browser = pw.chromium.launch(headless=False)
-            context = browser.new_context()
-            page = context.new_page()
-            page.goto("https://accounts.google.com/", wait_until="domcontentloaded")
-            try:
-                input("press Enter after you've signed in ... ")
-            except EOFError:
-                pass
-            context.storage_state(path=str(path))
-            browser.close()
-    except Exception as e:
-        print(f"auth failed: {e}")
-        return 1
-    print("saved. you can now run: hermes meet join <url>")
-    return 0
-
-
-def _cmd_join(
-    url: str,
-    *,
-    guest_name: str,
-    duration: Optional[str],
-    headed: bool,
-    mode: str = "transcribe",
-    node: Optional[str] = None,
-) -> int:
-    if not _is_safe_meet_url(url):
-        print(f"refusing: not a meet.google.com URL: {url}")
-        return 2
-    if node:
-        # Remote: go through NodeClient.
-        try:
-            from plugins.google_meet.node.registry import NodeRegistry
-            from plugins.google_meet.node.client import NodeClient
-        except ImportError as e:
-            print(f"node module unavailable: {e}")
-            return 1
-        reg = NodeRegistry()
-        entry = reg.resolve(node if node != "auto" else None)
-        if entry is None:
-            print(f"no registered node matches {node!r}")
-            return 1
-        client = NodeClient(url=entry["url"], token=entry["token"])
-        try:
-            res = client.start_bot(
-                url=url, guest_name=guest_name, duration=duration,
-                headed=headed, mode=mode,
-            )
-        except Exception as e:
-            print(f"remote start_bot failed: {e}")
-            return 1
-        print(json.dumps({"node": entry.get("name"), **res}, indent=2))
-        return 0 if res.get("ok") else 1
-
-    auth = _auth_state_path()
-    res = pm.start(
-        url=url,
-        headed=headed,
-        guest_name=guest_name,
-        duration=duration,
-        auth_state=str(auth) if auth.is_file() else None,
-        mode=mode,
-    )
-    print(json.dumps(res, indent=2))
-    return 0 if res.get("ok") else 1
-
-
-def _cmd_say(text: str, node: Optional[str] = None) -> int:
-    if not (text or "").strip():
-        print("refusing: empty text")
-        return 2
-    if node:
-        try:
-            from plugins.google_meet.node.registry import NodeRegistry
-            from plugins.google_meet.node.client import NodeClient
-        except ImportError as e:
-            print(f"node module unavailable: {e}")
-            return 1
-        reg = NodeRegistry()
-        entry = reg.resolve(node if node != "auto" else None)
-        if entry is None:
-            print(f"no registered node matches {node!r}")
-            return 1
-        client = NodeClient(url=entry["url"], token=entry["token"])
-        try:
-            res = client.say(text)
-        except Exception as e:
-            print(f"remote say failed: {e}")
-            return 1
-        print(json.dumps({"node": entry.get("name"), **res}, indent=2))
-        return 0 if res.get("ok") else 1
-
-    res = pm.enqueue_say(text)
-    print(json.dumps(res, indent=2))
-    return 0 if res.get("ok") else 1
-
-
-def _cmd_status() -> int:
-    res = pm.status()
-    print(json.dumps(res, indent=2))
-    return 0 if res.get("ok") else 1
-
-
-def _cmd_transcript(last: Optional[int]) -> int:
-    res = pm.transcript(last=last)
-    if not res.get("ok"):
-        print(json.dumps(res, indent=2))
-        return 1
-    for ln in res.get("lines", []):
-        print(ln)
-    return 0
-
-
-def _cmd_stop() -> int:
-    res = pm.stop(reason="hermes meet stop")
-    print(json.dumps(res, indent=2))
-    return 0 if res.get("ok") else 1
-
-
-if __name__ == "__main__":  # pragma: no cover
-    parser = argparse.ArgumentParser(prog="hermes meet")
-    register_cli(parser)
-    ns = parser.parse_args()
-    sys.exit(meet_command(ns))
@@ -1,852 +0,0 @@
-"""Headless Google Meet bot — Playwright + live-caption scraping.
-
-Runs as a standalone subprocess spawned by ``process_manager.py``. Reads config
-from env vars, writes status + transcript to files under
-``$HERMES_HOME/workspace/meetings/<meeting-id>/``. The main hermes process
-reads those files via the ``meet_*`` tools — no IPC beyond filesystem.
-
-The scraping strategy mirrors OpenUtter (sumansid/openutter): we don't parse
-WebRTC audio, we enable Google Meet's built-in live captions and observe the
-captions container in the DOM via a MutationObserver. This is lossy and
-English-biased but it is:
-
-* deterministic (no API keys, no STT billing),
-* works behind Meet's normal login / admission,
-* survives Meet UI rewrites fairly well because the caption container has a
-  stable ARIA role.
-
-Run standalone for debugging::
-
-    HERMES_MEET_URL=https://meet.google.com/abc-defg-hij \\
-    HERMES_MEET_OUT_DIR=/tmp/meet-debug \\
-    HERMES_MEET_HEADED=1 \\
-    python -m plugins.google_meet.meet_bot
-
-No meet.google.com URL → exits non-zero. Any URL that doesn't start with
-``https://meet.google.com/`` is rejected (explicit-by-design).
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-import signal
-import sys
-import threading
-import time
-from pathlib import Path
-from typing import Optional
-
-# Match ``https://meet.google.com/abc-defg-hij`` or ``.../lookup/...`` — the
-# short three-segment code or a lookup URL. Anything else is rejected.
-MEET_URL_RE = re.compile(
-    r"^https://meet\.google\.com/("
-    r"[a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,}"
-    r"|lookup/[^/?#]+"
-    r"|new"
-    r")(?:[/?#].*)?$"
-)
-
-
-# Filenames the bot reads/writes in ``HERMES_MEET_OUT_DIR``.
-SAY_QUEUE_FILENAME = "say_queue.jsonl"
-SAY_PCM_FILENAME = "speaker.pcm"
-
-
-def _is_safe_meet_url(url: str) -> bool:
-    """Return True if *url* is a Google Meet URL we're willing to navigate to."""
-    if not isinstance(url, str):
-        return False
-    return bool(MEET_URL_RE.match(url.strip()))
-
-
-def _meeting_id_from_url(url: str) -> str:
-    """Extract the 3-segment meeting code from a Meet URL.
-
-    For ``https://meet.google.com/abc-defg-hij`` → ``abc-defg-hij``.
-    For ``.../lookup/<id>`` or ``/new`` we fall back to a timestamped id — the
-    bot won't know the real code until after redirect, and callers pass this
-    through to filename anyway.
-    """
-    m = re.search(
-        r"meet\.google\.com/([a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,})",
-        url or "",
-    )
-    if m:
-        return m.group(1)
-    return f"meet-{int(time.time())}"
-
-
-# ---------------------------------------------------------------------------
-# Status + transcript file writers
-# ---------------------------------------------------------------------------
-
-class _BotState:
-    """Single-process mutable state, flushed to ``status.json`` on each change."""
-
-    def __init__(self, out_dir: Path, meeting_id: str, url: str):
-        self.out_dir = out_dir
-        self.meeting_id = meeting_id
-        self.url = url
-        self.in_call = False
-        self.captioning = False
-        self.captions_enabled_attempted = False
-        self.lobby_waiting = False
-        self.join_attempted_at: Optional[float] = None
-        self.joined_at: Optional[float] = None
-        self.last_caption_at: Optional[float] = None
-        self.transcript_lines = 0
-        self.error: Optional[str] = None
-        self.exited = False
-        # v2 realtime fields.
-        self.realtime = False
-        self.realtime_ready = False
-        self.realtime_device: Optional[str] = None
-        self.audio_bytes_out: int = 0
-        self.last_audio_out_at: Optional[float] = None
-        self.last_barge_in_at: Optional[float] = None
-        self.leave_reason: Optional[str] = None
-        # Scraped captions, in order, deduped. Each entry is a dict of
-        # {"ts": <epoch>, "speaker": str, "text": str}.
-        self._seen: set = set()
-        out_dir.mkdir(parents=True, exist_ok=True)
-        self.transcript_path = out_dir / "transcript.txt"
-        self.status_path = out_dir / "status.json"
-        self._flush()
-
-    # -------- transcript ------------------------------------------------
-
-    def record_caption(self, speaker: str, text: str) -> None:
-        """Append a caption line if we haven't seen this exact (speaker, text)."""
-        speaker = (speaker or "").strip() or "Unknown"
-        text = (text or "").strip()
-        if not text:
-            return
-        key = f"{speaker}|{text}"
-        if key in self._seen:
-            return
-        self._seen.add(key)
-        self.transcript_lines += 1
-        self.last_caption_at = time.time()
-        ts = time.strftime("%H:%M:%S", time.localtime(self.last_caption_at))
-        line = f"[{ts}] {speaker}: {text}\n"
-        # Atomic-ish append — good enough for a single-writer.
-        with self.transcript_path.open("a", encoding="utf-8") as f:
-            f.write(line)
-        self._flush()
-
-    # -------- status file ----------------------------------------------
-
-    def _flush(self) -> None:
-        data = {
-            "meetingId": self.meeting_id,
-            "url": self.url,
-            "inCall": self.in_call,
-            "captioning": self.captioning,
-            "captionsEnabledAttempted": self.captions_enabled_attempted,
-            "lobbyWaiting": self.lobby_waiting,
-            "joinAttemptedAt": self.join_attempted_at,
-            "joinedAt": self.joined_at,
-            "lastCaptionAt": self.last_caption_at,
-            "transcriptLines": self.transcript_lines,
-            "transcriptPath": str(self.transcript_path),
-            "error": self.error,
-            "exited": self.exited,
-            "pid": os.getpid(),
-            # v2 realtime telemetry.
-            "realtime": self.realtime,
-            "realtimeReady": self.realtime_ready,
-            "realtimeDevice": self.realtime_device,
-            "audioBytesOut": self.audio_bytes_out,
-            "lastAudioOutAt": self.last_audio_out_at,
-            "lastBargeInAt": self.last_barge_in_at,
-            "leaveReason": self.leave_reason,
-        }
-        tmp = self.status_path.with_suffix(".json.tmp")
-        tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
-        tmp.replace(self.status_path)
-
-    def set(self, **kwargs) -> None:
-        for k, v in kwargs.items():
-            setattr(self, k, v)
-        self._flush()
-
-
-# ---------------------------------------------------------------------------
-# Playwright bot entry point
-# ---------------------------------------------------------------------------
-
-# JavaScript injected into the Meet tab to observe captions. Captures
-# {speaker, text} tuples via a MutationObserver on the caption container,
-# and exposes ``window.__hermesMeetDrain()`` to pull new entries. This
-# mirrors the OpenUtter caption scraping approach.
-_CAPTION_OBSERVER_JS = r"""
-(() => {
-  if (window.__hermesMeetInstalled) return;
-  window.__hermesMeetInstalled = true;
-  window.__hermesMeetQueue = [];
-
-  const captionSelector = '[role="region"][aria-label*="aption" i], ' +
-                          'div[jsname="YSxPC"], ' +  // legacy
-                          'div[jsname="tgaKEf"]';    // current (Apr 2026)
-
-  function pushEntry(speaker, text) {
-    if (!text || !text.trim()) return;
-    window.__hermesMeetQueue.push({
-      ts: Date.now(),
-      speaker: (speaker || '').trim(),
-      text: text.trim(),
-    });
-  }
-
-  function scan(root) {
-    // Meet captions render as a list of rows; each row contains a speaker
-    // label and a text block. Selectors vary across Meet rewrites; we try
-    // a few shapes and fall back to raw text.
-    const rows = root.querySelectorAll('div[jsname="dsyhDe"], div.CNusmb, div.TBMuR');
-    if (rows.length) {
-      rows.forEach((row) => {
-        const spkEl = row.querySelector('div.KcIKyf, div.zs7s8d, span[jsname="YSxPC"]');
-        const txtEl = row.querySelector('div.bh44bd, span[jsname="tgaKEf"], div.iTTPOb');
-        const speaker = spkEl ? spkEl.innerText : '';
-        const text = txtEl ? txtEl.innerText : row.innerText;
-        pushEntry(speaker, text);
-      });
-      return;
-    }
-    // Fallback: treat the whole region's innerText as one anonymous line.
-    const text = (root.innerText || '').split('\n').filter(Boolean).pop();
-    pushEntry('', text);
-  }
-
-  function attach() {
-    const el = document.querySelector(captionSelector);
-    if (!el) return false;
-    const obs = new MutationObserver(() => scan(el));
-    obs.observe(el, { childList: true, subtree: true, characterData: true });
-    scan(el);
-    return true;
-  }
-
-  // Try now and retry on interval — the caption region only appears after
-  // captions are enabled and someone speaks.
-  if (!attach()) {
-    const iv = setInterval(() => { if (attach()) clearInterval(iv); }, 1500);
-  }
-
-  window.__hermesMeetDrain = () => {
-    const out = window.__hermesMeetQueue.slice();
-    window.__hermesMeetQueue = [];
-    return out;
-  };
-})();
-"""
-
-
-def _enable_captions_js() -> str:
-    """Return a small JS snippet that tries to click the 'Turn on captions' button.
-
-    Best-effort — Meet's caption toggle is keyboard-accessible via ``c``. We
-    dispatch that keystroke as a cheap fallback. Real click targeting is too
-    brittle to rely on.
-    """
-    return r"""
-    (() => {
-      const ev = new KeyboardEvent('keydown', {
-        key: 'c', code: 'KeyC', keyCode: 67, which: 67, bubbles: true,
-      });
-      document.body.dispatchEvent(ev);
-      return true;
-    })();
-    """
-
-
-def _start_realtime_speaker(
-    *,
-    rt: dict,
-    out_dir: Path,
-    bridge_info: dict,
-    api_key: str,
-    model: str,
-    voice: str,
-    instructions: str,
-    stop_flag: dict,
-    state: "_BotState",
-) -> None:
-    """Wire up the OpenAI Realtime session + speaker thread + PCM pump.
-
-    The speaker thread reads text lines from ``say_queue.jsonl``, sends each
-    to OpenAI Realtime, and writes PCM audio into ``speaker.pcm``. A
-    separate *pump* thread forwards that PCM into the OS audio sink so
-    Chrome's fake mic picks it up. On Linux we pipe to ``paplay`` against
-    the null-sink; on macOS the caller is expected to have the BlackHole
-    device selected as default input.
-    """
-    try:
-        from plugins.google_meet.realtime.openai_client import (
-            RealtimeSession,
-            RealtimeSpeaker,
-        )
-    except Exception as e:
-        state.set(error=f"realtime import failed: {e}")
-        return
-
-    pcm_path = out_dir / SAY_PCM_FILENAME
-    queue_path = out_dir / SAY_QUEUE_FILENAME
-    processed_path = out_dir / "say_processed.jsonl"
-    # Reset the sink file so we start clean each session.
-    pcm_path.write_bytes(b"")
-    # Make sure the queue exists so the speaker poller doesn't error on
-    # first iteration.
-    queue_path.touch()
-
-    try:
-        session = RealtimeSession(
-            api_key=api_key,
-            model=model,
-            voice=voice,
-            instructions=instructions,
-            audio_sink_path=pcm_path,
-            sample_rate=24000,
-        )
-        session.connect()
-    except Exception as e:
-        state.set(error=f"realtime connect failed: {e}")
-        return
-
-    rt["session"] = session
-
-    def _stop_fn():
-        return stop_flag.get("stop", False)
-
-    rt["speaker_stop"] = lambda: stop_flag.__setitem__("stop", stop_flag.get("stop", False))
-
-    speaker = RealtimeSpeaker(
-        session=session,
-        queue_path=queue_path,
-        processed_path=processed_path,
-    )
-
-    def _speaker_loop():
-        try:
-            speaker.run_until_stopped(_stop_fn)
-        except Exception as e:
-            state.set(error=f"realtime speaker crashed: {e}")
-
-    t_speaker = threading.Thread(target=_speaker_loop, name="meet-speaker", daemon=True)
-    t_speaker.start()
-    rt["speaker_thread"] = t_speaker
-
-    # PCM pump: feeds speaker.pcm (24kHz s16le mono) into the OS audio
-    # device that Chrome's fake mic reads from. Different tools per
-    # platform, but the contract is the same — block-read the growing
-    # PCM file and stream it to the device in near-real-time.
-    platform_tag = (bridge_info or {}).get("platform")
-    if platform_tag == "linux":
-        import subprocess as _sp
-
-        sink = (bridge_info or {}).get("write_target") or "hermes_meet_sink"
-        try:
-            proc = _sp.Popen(
-                [
-                    "paplay",
-                    "--raw",
-                    "--rate=24000",
-                    "--format=s16le",
-                    "--channels=1",
-                    f"--device={sink}",
-                    str(pcm_path),
-                ],
-                stdin=_sp.DEVNULL,
-                stdout=_sp.DEVNULL,
-                stderr=_sp.DEVNULL,
-            )
-            rt["pcm_pump"] = proc
-        except FileNotFoundError:
-            state.set(error="paplay not found — install pulseaudio-utils for realtime on Linux")
-    elif platform_tag == "darwin":
-        # macOS: use ffmpeg to tail-read speaker.pcm and write it to the
-        # BlackHole output device. The user must have BlackHole selected
-        # as the default input in System Settings → Sound for Chrome to
-        # pick it up. We prefer ffmpeg because it's scriptable and can
-        # target AVFoundation devices by name; fall back to afplay-ing
-        # the file in a tight loop if ffmpeg is absent.
-        import shutil as _shutil
-        import subprocess as _sp
-
-        device_name = (bridge_info or {}).get("write_target") or "BlackHole 2ch"
-        if _shutil.which("ffmpeg"):
-            try:
-                # -re: read input at native frame rate.
-                # -f avfoundation -i: speaker path as raw PCM.
-                # -f s16le -ar 24000 -ac 1 -i <pcm>: interpret the file.
-                # -f audiotoolbox -audio_device_index: write to BlackHole.
-                # Simpler: output as raw via coreaudio using "-f audiotoolbox".
-                # ffmpeg's audiotoolbox output picks the current default
-                # output device, which isn't what we want. Instead we use
-                # -f avfoundation with the named device as OUTPUT via
-                # -vn and the device name.
-                proc = _sp.Popen(
-                    [
-                        "ffmpeg",
-                        "-nostdin", "-hide_banner", "-loglevel", "error",
-                        "-re",
-                        "-f", "s16le", "-ar", "24000", "-ac", "1",
-                        "-i", str(pcm_path),
-                        "-f", "audiotoolbox",
-                        "-audio_device_index", _mac_audio_device_index(device_name),
-                        "-",
-                    ],
-                    stdin=_sp.DEVNULL,
-                    stdout=_sp.DEVNULL,
-                    stderr=_sp.DEVNULL,
-                )
-                rt["pcm_pump"] = proc
-            except FileNotFoundError:
-                state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
-            except Exception as e:
-                state.set(error=f"macOS pcm pump failed to start: {e}")
-        else:
-            state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
-
-
-def _mac_audio_device_index(device_name: str) -> str:
-    """Return the ffmpeg ``-audio_device_index`` for *device_name*, as a string.
-
-    Probes ``ffmpeg -f avfoundation -list_devices true -i ''`` (which prints
-    the device table on stderr) and matches *device_name* case-insensitively.
-    Defaults to ``"0"`` if the device can't be found — caller will get a
-    misrouted stream but not a crash, and the error will be obvious.
-    """
-    import subprocess as _sp
-
-    try:
-        out = _sp.run(
-            ["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""],
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-    except Exception:
-        return "0"
-    # ffmpeg prints the table on stderr. Lines look like:
-    #   [AVFoundation indev @ 0x...] [0] BlackHole 2ch
-    import re as _re
-
-    needle = device_name.strip().lower()
-    for line in (out.stderr or "").splitlines():
-        m = _re.search(r"\[(\d+)\]\s+(.+)$", line)
-        if not m:
-            continue
-        if m.group(2).strip().lower() == needle:
-            return m.group(1)
-    return "0"
-
-
-def run_bot() -> int:  # noqa: C901 — orchestration, explicit branches
-    url = os.environ.get("HERMES_MEET_URL", "").strip()
-    out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
-    headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
-    auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
-    guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
-    duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
-    # v2: optional realtime mode. Enabled when HERMES_MEET_MODE=realtime.
-    mode = os.environ.get("HERMES_MEET_MODE", "transcribe").strip().lower()
-    realtime_model = os.environ.get("HERMES_MEET_REALTIME_MODEL", "gpt-realtime")
-    realtime_voice = os.environ.get("HERMES_MEET_REALTIME_VOICE", "alloy")
-    realtime_instructions = os.environ.get("HERMES_MEET_REALTIME_INSTRUCTIONS", "")
-    realtime_api_key = os.environ.get("HERMES_MEET_REALTIME_KEY") or os.environ.get("OPENAI_API_KEY", "")
-
-    if not url or not _is_safe_meet_url(url):
-        sys.stderr.write(
-            "google_meet bot: refusing to launch — HERMES_MEET_URL must be a "
-            "meet.google.com URL. got: %r\n" % url
-        )
-        return 2
-    if not out_dir_env:
-        sys.stderr.write("google_meet bot: HERMES_MEET_OUT_DIR is required\n")
-        return 2
-
-    out_dir = Path(out_dir_env)
-    meeting_id = _meeting_id_from_url(url)
-    state = _BotState(out_dir=out_dir, meeting_id=meeting_id, url=url)
-
-    # SIGTERM → exit cleanly so the parent ``meet_leave`` gets a finalized
-    # transcript. We set a flag instead of raising so the Playwright context
-    # teardown runs in the finally block below.
-    stop_flag = {"stop": False}
-
-    def _on_signal(_sig, _frame):
-        stop_flag["stop"] = True
-
-    signal.signal(signal.SIGTERM, _on_signal)
-    signal.signal(signal.SIGINT, _on_signal)
-
-    # v2 realtime: provision virtual audio device + start speaker thread.
-    # We track these in a dict so the finally block can tear them down
-    # regardless of how we exit. If anything in the realtime setup fails we
-    # fall back to transcribe mode with a status flag.
-    rt = {
-        "enabled": mode == "realtime",
-        "bridge": None,            # AudioBridge | None
-        "bridge_info": None,       # dict | None
-        "session": None,           # RealtimeSession | None
-        "speaker_thread": None,    # threading.Thread | None
-        "speaker_stop": None,      # callable | None
-    }
-    if rt["enabled"]:
-        if not realtime_api_key:
-            state.set(error="realtime mode requested but no API key in HERMES_MEET_REALTIME_KEY/OPENAI_API_KEY — falling back to transcribe")
-            rt["enabled"] = False
-        else:
-            try:
-                from plugins.google_meet.audio_bridge import AudioBridge
-                bridge = AudioBridge()
-                rt["bridge_info"] = bridge.setup()
-                rt["bridge"] = bridge
-                state.set(realtime=True, realtime_device=rt["bridge_info"].get("device_name"))
-            except Exception as e:
-                state.set(error=f"audio bridge setup failed: {e} — falling back to transcribe")
-                rt["enabled"] = False
-
-    try:
-        from playwright.sync_api import sync_playwright
-    except ImportError as e:
-        state.set(error=f"playwright not installed: {e}", exited=True)
-        sys.stderr.write(
-            "google_meet bot: playwright is not installed. Run "
-            "`pip install playwright && python -m playwright install chromium`\n"
-        )
-        if rt["bridge"]:
-            rt["bridge"].teardown()
-        return 3
-
-    # Chrome env: if realtime is live on Linux, point PULSE_SOURCE at the
-    # virtual source so Chrome's fake mic reads the audio we generate.
-    chrome_env = os.environ.copy()
-    chrome_args = [
-        "--use-fake-ui-for-media-stream",
-        "--disable-blink-features=AutomationControlled",
-    ]
-    if not rt["enabled"]:
-        # v1-style fake device (silence) — we don't care about mic content
-        # when we're not speaking.
-        chrome_args.insert(1, "--use-fake-device-for-media-stream")
-    elif rt["bridge_info"] and rt["bridge_info"].get("platform") == "linux":
-        chrome_env["PULSE_SOURCE"] = rt["bridge_info"].get("device_name", "")
-
-    try:
-        with sync_playwright() as pw:
-            # Playwright's launch() doesn't take env; we set PULSE_SOURCE
-            # via the process env before launch so the child Chrome inherits it.
-            for k, v in chrome_env.items():
-                os.environ[k] = v
-            browser = pw.chromium.launch(
-                headless=not headed,
-                args=chrome_args,
-            )
-            context_args = {
-                "viewport": {"width": 1280, "height": 800},
-                "user_agent": (
-                    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
-                    "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
-                ),
-                "permissions": ["microphone", "camera"],
-            }
-            if auth_state and Path(auth_state).is_file():
-                context_args["storage_state"] = auth_state
-            context = browser.new_context(**context_args)
-            page = context.new_page()
-
-            try:
-                page.goto(url, wait_until="domcontentloaded", timeout=30_000)
-            except Exception as e:
-                state.set(error=f"navigate failed: {e}", exited=True)
-                return 4
-
-            # Guest-mode: Meet shows a name field before "Ask to join". When
-            # we're authed, we instead see "Join now".
-            _try_guest_name(page, guest_name)
-            _click_join(page, state)
-
-            # Install caption observer and attempt to enable captions.
-            try:
-                page.evaluate(_enable_captions_js())
-                state.set(captions_enabled_attempted=True)
-            except Exception:
-                pass
-            try:
-                page.evaluate(_CAPTION_OBSERVER_JS)
-            except Exception as e:
-                state.set(error=f"caption observer install failed: {e}")
-
-            # Note: in_call=False until admission is confirmed (we detect
-            # either the Leave button or the caption region, signalling we
-            # made it past the lobby).
-            state.set(captioning=True, join_attempted_at=time.time())
-
-            # v2 realtime: start the speaker thread reading from the
-            # plugin-side say queue. The thread reads JSONL lines written by
-            # meet_say, calls OpenAI Realtime, and streams the audio PCM to
-            # the virtual sink that Chrome's fake-mic is pointed at.
-            if rt["enabled"]:
-                _start_realtime_speaker(
-                    rt=rt,
-                    out_dir=out_dir,
-                    bridge_info=rt["bridge_info"],
-                    api_key=realtime_api_key,
-                    model=realtime_model,
-                    voice=realtime_voice,
-                    instructions=realtime_instructions,
-                    stop_flag=stop_flag,
-                    state=state,
-                )
-                if rt["session"] is not None:
-                    state.set(realtime_ready=True)
-
-            # Admission + drain loop. Runs until SIGTERM, duration expiry,
-            # or the page detects "You were removed / you left the
-            # meeting". Responsible for:
-            #   * detecting admission (Leave button visible → in_call=True)
-            #   * timing out stuck-in-lobby (default 5 minutes)
-            #   * draining scraped captions into the transcript
-            #   * triggering realtime barge-in when a human speaks while
-            #     the bot is generating audio
-            #   * periodically flushing realtime counters into status.json
-            deadline = (time.time() + duration_s) if duration_s else None
-            lobby_deadline = time.time() + float(
-                os.environ.get("HERMES_MEET_LOBBY_TIMEOUT", "300")
-            )
-            last_admission_check = 0.0
-            while not stop_flag["stop"]:
-                now = time.time()
-                if deadline and now > deadline:
-                    state.set(leave_reason="duration_expired")
-                    break
-
-                # Admission detection every ~3s until admitted.
-                if not state.in_call and (now - last_admission_check) > 3.0:
-                    last_admission_check = now
-                    admitted = _detect_admission(page)
-                    if admitted:
-                        state.set(
-                            in_call=True,
-                            lobby_waiting=False,
-                            joined_at=now,
-                        )
-                    elif now > lobby_deadline:
-                        state.set(
-                            error=(
-                                "lobby timeout — host never admitted the bot "
-                                f"within {int(lobby_deadline - state.join_attempted_at) if state.join_attempted_at else 0}s"
-                            ),
-                            leave_reason="lobby_timeout",
-                        )
-                        break
-                    elif _detect_denied(page):
-                        state.set(
-                            error="host denied admission",
-                            leave_reason="denied",
-                        )
-                        break
-
-                try:
-                    queued = page.evaluate("window.__hermesMeetDrain && window.__hermesMeetDrain()")
-                    if isinstance(queued, list):
-                        for entry in queued:
-                            if not isinstance(entry, dict):
-                                continue
-                            speaker = str(entry.get("speaker", ""))
-                            text = str(entry.get("text", ""))
-                            state.record_caption(speaker=speaker, text=text)
-                            # Barge-in: if the bot is currently generating
-                            # audio AND a real human just spoke, cancel the
-                            # in-flight response so we don't talk over them.
-                            if rt["enabled"] and rt["session"] is not None:
-                                if _looks_like_human_speaker(speaker, guest_name):
-                                    try:
-                                        cancelled = rt["session"].cancel_response()
-                                        if cancelled:
-                                            state.set(last_barge_in_at=now)
-                                    except Exception:
-                                        pass
-                except Exception:
-                    # Meet reloaded or we got booted — try to detect and
-                    # exit gracefully rather than spinning.
-                    if page.is_closed():
-                        state.set(leave_reason="page_closed")
-                        break
-
-                # Fold the realtime session's byte/timestamp counters into
-                # the status file so meet_status can surface them.
-                if rt["session"] is not None:
-                    state.set(
-                        audio_bytes_out=getattr(rt["session"], "audio_bytes_out", 0),
-                        last_audio_out_at=getattr(rt["session"], "last_audio_out_at", None),
-                    )
-
-                time.sleep(1.0)
-
-            # Try to leave cleanly — click "Leave call" button if present.
-            try:
-                page.evaluate(
-                    "() => { const b = document.querySelector('button[aria-label*=\"eave call\"]');"
-                    " if (b) b.click(); }"
-                )
-            except Exception:
-                pass
-
-            context.close()
-            browser.close()
-            # v2: teardown realtime speaker + audio bridge.
-            if rt["speaker_stop"]:
-                try:
-                    rt["speaker_stop"]()
-                except Exception:
-                    pass
-            if rt["speaker_thread"] is not None:
-                try:
-                    rt["speaker_thread"].join(timeout=5.0)
-                except Exception:
-                    pass
-            if rt["session"]:
-                try:
-                    rt["session"].close()
-                except Exception:
-                    pass
-            if rt["bridge"]:
-                try:
-                    rt["bridge"].teardown()
-                except Exception:
-                    pass
-            state.set(in_call=False, captioning=False, exited=True)
-            return 0
-
-    except Exception as e:
-        state.set(error=f"unhandled: {e}", exited=True)
-        return 1
-
-
-def _try_guest_name(page, guest_name: str) -> None:
-    """If Meet is showing a guest-name input, type *guest_name* into it."""
-    try:
-        # Meet's guest name input has placeholder "Your name".
-        locator = page.locator('input[aria-label*="name" i]').first
-        if locator.count() and locator.is_visible():
-            locator.fill(guest_name, timeout=2_000)
-    except Exception:
-        pass
-
-
-def _detect_admission(page) -> bool:
-    """True if we're clearly past the lobby and in the call itself.
-
-    Uses a JS-side probe because Meet's DOM structure varies by client
-    version. We check several high-signal indicators and declare admission
-    on the first hit:
-
-      1. Leave-call button is present (``aria-label`` contains "eave call").
-      2. Caption region has appeared (we installed the observer and it attached).
-      3. The participant list container is visible.
-
-    Conservative by default — returns False on any error.
-    """
-    probe = r"""
-    (() => {
-      const leave = document.querySelector('button[aria-label*="eave call" i]');
-      if (leave) return true;
-      if (window.__hermesMeetInstalled) {
-        const caps = document.querySelector(
-          '[role="region"][aria-label*="aption" i], ' +
-          'div[jsname="YSxPC"], div[jsname="tgaKEf"]'
-        );
-        if (caps) return true;
-      }
-      const parts = document.querySelector('[aria-label*="articipants" i]');
-      if (parts) return true;
-      return false;
-    })();
-    """
-    try:
-        return bool(page.evaluate(probe))
-    except Exception:
-        return False
-
-
-def _detect_denied(page) -> bool:
-    """True when Meet is showing a 'you were denied' / 'no one admitted' page."""
-    probe = r"""
-    (() => {
-      const text = document.body ? document.body.innerText || '' : '';
-      // English only — matches what shows up when the host denies or
-      // removes a guest.
-      if (/You can't join this video call/i.test(text)) return true;
-      if (/You were removed from the meeting/i.test(text)) return true;
-      if (/No one responded to your request to join/i.test(text)) return true;
-      return false;
-    })();
-    """
-    try:
-        return bool(page.evaluate(probe))
-    except Exception:
-        return False
-
-
-def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
-    """Whether a caption line's speaker is probably a human, not our bot echo.
-
-    Meet attributes captions to the speaker's display name. When Chrome is
-    reading our fake mic, Meet still attributes captions to *our* bot name
-    (because the bot is the one "speaking"). We don't want those to trigger
-    barge-in. Anything else — real participant names — does.
-
-    Conservative: unknown / blank speakers (common when caption scraping
-    falls back to raw text) do NOT trigger barge-in, because we can't tell
-    whether it was a human or us.
-    """
-    if not speaker or not speaker.strip():
-        return False
-    spk = speaker.strip().lower()
-    if spk in ("unknown", "you", bot_guest_name.strip().lower()):
-        return False
-    return True
-
-
-def _click_join(page, state: _BotState) -> None:
-    """Click 'Join now' or 'Ask to join' if either button is visible.
-
-    Flags ``lobby_waiting`` when we hit the "waiting for host to admit you"
-    state so the agent can surface that in status.
-    """
-    for label in ("Join now", "Ask to join"):
-        try:
-            btn = page.get_by_role("button", name=label, exact=False).first
-            if btn.count() and btn.is_visible():
-                btn.click(timeout=3_000)
-                if label == "Ask to join":
-                    state.set(lobby_waiting=True)
-                break
-        except Exception:
-            continue
-
-
-def _parse_duration(raw: str) -> Optional[float]:
-    """Parse ``30m`` / ``2h`` / ``90`` (seconds) → float seconds, or None."""
-    if not raw:
-        return None
-    raw = raw.strip().lower()
-    try:
-        if raw.endswith("h"):
-            return float(raw[:-1]) * 3600
-        if raw.endswith("m"):
-            return float(raw[:-1]) * 60
-        if raw.endswith("s"):
-            return float(raw[:-1])
-        return float(raw)
-    except ValueError:
-        return None
-
-
-if __name__ == "__main__":  # pragma: no cover — subprocess entry point
-    sys.exit(run_bot())
@@ -1,54 +0,0 @@
-"""Remote 'node host' primitive for the google_meet plugin.
-
-Lets the Meet bot (Playwright + Chrome) run on a different machine than
-the hermes-agent gateway. The gateway speaks a small JSON-over-WebSocket
-RPC protocol to the remote node; the node wraps the existing
-``plugins.google_meet.process_manager`` API.
-
-Topology
--------
-    gateway (Linux)  ── ws://mac.local:18789 ──▶  node server (Mac)
-                                                  └─ process_manager
-                                                     └─ meet_bot (Playwright)
-
-Why: Google sign-in + Chrome profile live on the user's laptop. Running
-the bot there reuses that profile without shipping credentials to the
-server.
-
-Public surface
--------------
-    NodeClient     — gateway-side RPC client (short-lived sync WS per call)
-    NodeServer     — long-running server that hosts the bot
-    NodeRegistry   — local JSON registry of approved nodes (name → url+token)
-    protocol       — message envelope helpers (make_request, encode, decode, ...)
-"""
-
-from __future__ import annotations
-
-from plugins.google_meet.node import protocol
-from plugins.google_meet.node.client import NodeClient
-from plugins.google_meet.node.protocol import (
-    VALID_REQUEST_TYPES,
-    decode,
-    encode,
-    make_error,
-    make_request,
-    make_response,
-    validate_request,
-)
-from plugins.google_meet.node.registry import NodeRegistry
-from plugins.google_meet.node.server import NodeServer
-
-__all__ = [
-    "NodeClient",
-    "NodeServer",
-    "NodeRegistry",
-    "protocol",
-    "make_request",
-    "make_response",
-    "make_error",
-    "encode",
-    "decode",
-    "validate_request",
-    "VALID_REQUEST_TYPES",
-]
@@ -1,125 +0,0 @@
-"""`hermes meet node ...` subcommand tree.
-
-Wired into the existing ``hermes meet`` parser by the plugin's top-level
-CLI. This module only defines the subparsers and their dispatch — it
-does not mutate the existing cli.py.
-"""
-
-from __future__ import annotations
-
-import argparse
-import asyncio
-import json
-import sys
-from typing import Any
-
-from plugins.google_meet.node.client import NodeClient
-from plugins.google_meet.node.registry import NodeRegistry
-from plugins.google_meet.node.server import NodeServer
-
-
-def register_cli(subparser: argparse.ArgumentParser) -> None:
-    """Add ``run / list / approve / remove / status / ping`` subparsers.
-
-    *subparser* is the ``hermes meet node`` argparse object — typically
-    the result of ``meet_parser.add_parser('node', ...)``.
-    """
-    sp = subparser.add_subparsers(dest="node_cmd", required=True)
-
-    run = sp.add_parser("run", help="Start a node server on this machine.")
-    run.add_argument("--host", default="0.0.0.0")
-    run.add_argument("--port", type=int, default=18789)
-    run.add_argument("--display-name", default="hermes-meet-node")
-    run.set_defaults(func=node_command)
-
-    lst = sp.add_parser("list", help="List approved remote nodes.")
-    lst.set_defaults(func=node_command)
-
-    app = sp.add_parser("approve", help="Register a remote node on the gateway.")
-    app.add_argument("name")
-    app.add_argument("url")
-    app.add_argument("token")
-    app.set_defaults(func=node_command)
-
-    rm = sp.add_parser("remove", help="Forget a registered node.")
-    rm.add_argument("name")
-    rm.set_defaults(func=node_command)
-
-    st = sp.add_parser("status", help="Ping a registered node.")
-    st.add_argument("name")
-    st.set_defaults(func=node_command)
-
-    pg = sp.add_parser("ping", help="Alias for status.")
-    pg.add_argument("name")
-    pg.set_defaults(func=node_command)
-
-
-def node_command(args: argparse.Namespace) -> int:
-    """Dispatch for ``hermes meet node ...``.
-
-    Returns a process exit code. Side-effects print to stdout/stderr.
-    """
-    cmd = getattr(args, "node_cmd", None)
-
-    if cmd == "run":
-        server = NodeServer(
-            host=args.host,
-            port=args.port,
-            display_name=args.display_name,
-        )
-        token = server.ensure_token()
-        print(f"[meet-node] display_name={server.display_name}")
-        print(f"[meet-node] listening on ws://{args.host}:{args.port}")
-        print(f"[meet-node] token (copy to gateway): {token}")
-        print(f"[meet-node] approve with:")
-        print(f"             hermes meet node approve <name> ws://<host>:{args.port} {token}")
-        try:
-            asyncio.run(server.serve())
-        except KeyboardInterrupt:
-            return 0
-        except RuntimeError as exc:
-            print(f"[meet-node] error: {exc}", file=sys.stderr)
-            return 2
-        return 0
-
-    reg = NodeRegistry()
-
-    if cmd == "list":
-        nodes = reg.list_all()
-        if not nodes:
-            print("no nodes registered")
-            return 0
-        for n in nodes:
-            print(f"{n['name']}\t{n['url']}\ttoken={n['token'][:6]}…")
-        return 0
-
-    if cmd == "approve":
-        reg.add(args.name, args.url, args.token)
-        print(f"approved node {args.name!r} at {args.url}")
-        return 0
-
-    if cmd == "remove":
-        ok = reg.remove(args.name)
-        print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
-        return 0 if ok else 1
-
-    if cmd in ("status", "ping"):
-        entry = reg.get(args.name)
-        if entry is None:
-            print(f"no such node: {args.name!r}", file=sys.stderr)
-            return 1
-        client = NodeClient(entry["url"], entry["token"])
-        try:
-            result = client.ping()
-        except Exception as exc:  # noqa: BLE001 — surface any connection error
-            print(json.dumps({"ok": False, "error": str(exc)}))
-            return 1
-        print(json.dumps({"ok": True, "node": args.name, **_coerce_dict(result)}))
-        return 0
-
-    print(f"unknown node command: {cmd!r}", file=sys.stderr)
-    return 2
-
-
-def _coerce_dict(value: Any) -> dict:
-    return value if isinstance(value, dict) else {"result": value}
@@ -1,107 +0,0 @@
-"""Gateway-side RPC client for a remote meet node.
-
-Each call opens a short-lived synchronous WebSocket to the node, sends
-exactly one request, reads exactly one response, and closes. This keeps
-the client trivial to use from non-async tool handlers and avoids
-maintaining persistent connection state across agent turns.
-
-The ``websockets`` package is an optional dep — we import it lazily so
-plugin load doesn't require it.
-"""
-
-from __future__ import annotations
-
-from typing import Any, Dict, Optional
-
-from plugins.google_meet.node import protocol as _proto
-
-
-class NodeClient:
-    """Thin synchronous WS client matching the server's request surface."""
-
-    def __init__(self, url: str, token: str, timeout: float = 10.0) -> None:
-        if not isinstance(url, str) or not url:
-            raise ValueError("url must be a non-empty string")
-        if not isinstance(token, str) or not token:
-            raise ValueError("token must be a non-empty string")
-        self.url = url
-        self.token = token
-        self.timeout = float(timeout)
-
-    # ----- core RPC -----------------------------------------------------
-
-    def _rpc(self, type: str, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Send one request, return the response payload dict.
-
-        Raises RuntimeError when the server sends an ``error`` envelope
-        or the response id doesn't match.
-        """
-        try:
-            from websockets.sync.client import connect  # type: ignore
-        except ImportError as exc:
-            raise RuntimeError(
-                "NodeClient requires the 'websockets' package. "
-                "Install it with: pip install websockets"
-            ) from exc
-
-        req = _proto.make_request(type, self.token, payload)
-        raw_out = _proto.encode(req)
-
-        with connect(self.url, open_timeout=self.timeout,
-                     close_timeout=self.timeout) as ws:
-            ws.send(raw_out)
-            raw_in = ws.recv(timeout=self.timeout)
-
-        if isinstance(raw_in, (bytes, bytearray)):
-            raw_in = raw_in.decode("utf-8")
-        resp = _proto.decode(raw_in)
-
-        if resp.get("type") == "error":
-            raise RuntimeError(f"node error: {resp.get('error', '<unknown>')}")
-        if resp.get("id") != req["id"]:
-            raise RuntimeError(
-                f"response id mismatch: sent {req['id']}, got {resp.get('id')!r}"
-            )
-        payload_out = resp.get("payload")
-        if not isinstance(payload_out, dict):
-            # Ping returns {"type": "pong", "payload": {...}} — still a dict.
-            raise RuntimeError("response missing payload dict")
-        return payload_out
-
-    # ----- convenience methods -----------------------------------------
-
-    def start_bot(
-        self,
-        url: str,
-        guest_name: str = "Hermes Agent",
-        duration: Optional[str] = None,
-        headed: bool = False,
-        mode: str = "transcribe",
-    ) -> Dict[str, Any]:
-        payload: Dict[str, Any] = {
-            "url": url,
-            "guest_name": guest_name,
-            "headed": bool(headed),
-            "mode": mode,
-        }
-        if duration is not None:
-            payload["duration"] = duration
-        return self._rpc("start_bot", payload)
-
-    def stop(self) -> Dict[str, Any]:
-        return self._rpc("stop", {})
-
-    def status(self) -> Dict[str, Any]:
-        return self._rpc("status", {})
-
-    def transcript(self, last: Optional[int] = None) -> Dict[str, Any]:
-        payload: Dict[str, Any] = {}
-        if last is not None:
-            payload["last"] = int(last)
-        return self._rpc("transcript", payload)
-
-    def say(self, text: str) -> Dict[str, Any]:
-        return self._rpc("say", {"text": str(text)})
-
-    def ping(self) -> Dict[str, Any]:
-        return self._rpc("ping", {})
@@ -1,124 +0,0 @@
-"""Wire protocol for gateway ↔ node RPC.
-
-Everything is a JSON object with the same envelope shape:
-
-    Request:   {"type": <str>, "id": <str>, "token": <str>, "payload": <dict>}
-    Response:  {"type": "<req-type>_res", "id": <req-id>, "payload": <dict>}
-    Error:     {"type": "error", "id": <req-id>, "error": <str>}
-
-Requests must carry the shared bearer token (set up via
-``hermes meet node approve`` on the gateway and read off disk on the
-server). Mismatched tokens are rejected before dispatch.
-"""
-
-from __future__ import annotations
-
-import json
-import uuid
-from typing import Any, Dict, Tuple
-
-
-VALID_REQUEST_TYPES = frozenset({
-    "start_bot",
-    "stop",
-    "status",
-    "transcript",
-    "say",
-    "ping",
-})
-
-
-def make_request(
-    type: str,
-    token: str,
-    payload: Dict[str, Any],
-    req_id: str | None = None,
-) -> Dict[str, Any]:
-    """Construct a request envelope.
-
-    ``req_id`` is auto-generated (uuid4 hex) when not supplied so callers
-    can correlate async responses.
-    """
-    if not isinstance(type, str) or not type:
-        raise ValueError("type must be a non-empty string")
-    if type not in VALID_REQUEST_TYPES:
-        raise ValueError(f"unknown request type: {type!r}")
-    if not isinstance(token, str):
-        raise ValueError("token must be a string")
-    if not isinstance(payload, dict):
-        raise ValueError("payload must be a dict")
-    return {
-        "type": type,
-        "id": req_id or uuid.uuid4().hex,
-        "token": token,
-        "payload": payload,
-    }
-
-
-def make_response(req_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
-    """Build a success response. The caller supplies the *request* type;
-    we suffix it with ``_res`` so clients can assert they got the right
-    reply.
-
-    For simplicity we don't require the type here — clients usually just
-    key off ``id``. But we still emit a generic ``*_res`` envelope.
-    """
-    if not isinstance(payload, dict):
-        raise ValueError("payload must be a dict")
-    return {"type": "response", "id": req_id, "payload": payload}
-
-
-def make_error(req_id: str, error: str) -> Dict[str, Any]:
-    return {"type": "error", "id": req_id, "error": str(error)}
-
-
-def encode(msg: Dict[str, Any]) -> str:
-    """Serialize a message envelope to a JSON string."""
-    return json.dumps(msg, separators=(",", ":"), ensure_ascii=False)
-
-
-def decode(raw: str) -> Dict[str, Any]:
-    """Parse a JSON envelope, raising ValueError on anything malformed.
-
-    Minimal type validation: must be an object, must contain ``type`` and
-    ``id``. Heavier validation (token match, payload shape) happens in
-    :func:`validate_request` on the server side.
-    """
-    try:
-        obj = json.loads(raw)
-    except (TypeError, json.JSONDecodeError) as exc:
-        raise ValueError(f"malformed JSON: {exc}") from exc
-    if not isinstance(obj, dict):
-        raise ValueError("envelope must be a JSON object")
-    if "type" not in obj or not isinstance(obj["type"], str):
-        raise ValueError("envelope missing string 'type'")
-    if "id" not in obj or not isinstance(obj["id"], str):
-        raise ValueError("envelope missing string 'id'")
-    return obj
-
-
-def validate_request(msg: Dict[str, Any], expected_token: str) -> Tuple[bool, str]:
-    """Check a decoded request against the server's shared token.
-
-    Returns ``(True, "")`` when the envelope is acceptable or
-    ``(False, <reason>)`` otherwise. Reason strings are safe to surface
-    back to the client in an error envelope.
-    """
-    if not isinstance(msg, dict):
-        return False, "envelope must be a dict"
-    t = msg.get("type")
-    if not isinstance(t, str) or not t:
-        return False, "missing or non-string 'type'"
-    if t not in VALID_REQUEST_TYPES:
-        return False, f"unknown request type: {t!r}"
-    if not isinstance(msg.get("id"), str) or not msg.get("id"):
-        return False, "missing or non-string 'id'"
-    token = msg.get("token")
-    if not isinstance(token, str) or not token:
-        return False, "missing token"
-    if token != expected_token:
-        return False, "token mismatch"
-    payload = msg.get("payload")
-    if not isinstance(payload, dict):
-        return False, "payload must be a dict"
-    return True, ""
@@ -1,112 +0,0 @@
-"""Local JSON registry of approved remote meet nodes.
-
-Lives at ``$HERMES_HOME/workspace/meetings/nodes.json``. The gateway
-consults it to resolve a ``chrome_node`` name to a ``(url, token)`` pair
-before opening a WebSocket to the remote bot host.
-
-Schema
------
-    {
-      "nodes": {
-        "<name>": {
-          "url":   "ws://host:port",
-          "token": "...",
-          "added_at": <epoch_float>
-        }
-      }
-    }
-"""
-
-from __future__ import annotations
-
-import json
-import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from hermes_constants import get_hermes_home
-
-
-def _default_path() -> Path:
-    return Path(get_hermes_home()) / "workspace" / "meetings" / "nodes.json"
-
-
-class NodeRegistry:
-    """Simple file-backed registry. Not concurrent-safe across processes
-    — single writer assumed (the gateway CLI)."""
-
-    def __init__(self, path: Optional[Path] = None) -> None:
-        self.path = Path(path) if path is not None else _default_path()
-
-    # ----- storage ------------------------------------------------------
-
-    def _load(self) -> Dict[str, Any]:
-        if not self.path.is_file():
-            return {"nodes": {}}
-        try:
-            data = json.loads(self.path.read_text(encoding="utf-8"))
-        except (OSError, json.JSONDecodeError):
-            return {"nodes": {}}
-        if not isinstance(data, dict) or not isinstance(data.get("nodes"), dict):
-            return {"nodes": {}}
-        return data
-
-    def _save(self, data: Dict[str, Any]) -> None:
-        self.path.parent.mkdir(parents=True, exist_ok=True)
-        tmp = self.path.with_suffix(".json.tmp")
-        tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
-        tmp.replace(self.path)
-
-    # ----- public API ---------------------------------------------------
-
-    def get(self, name: str) -> Optional[Dict[str, Any]]:
-        data = self._load()
-        entry = data["nodes"].get(name)
-        if entry is None:
-            return None
-        return {"name": name, **entry}
-
-    def add(self, name: str, url: str, token: str) -> None:
-        if not isinstance(name, str) or not name:
-            raise ValueError("node name must be a non-empty string")
-        if not isinstance(url, str) or not url:
-            raise ValueError("url must be a non-empty string")
-        if not isinstance(token, str) or not token:
-            raise ValueError("token must be a non-empty string")
-        data = self._load()
-        data["nodes"][name] = {
-            "url": url,
-            "token": token,
-            "added_at": time.time(),
-        }
-        self._save(data)
-
-    def remove(self, name: str) -> bool:
-        data = self._load()
-        if name in data["nodes"]:
-            del data["nodes"][name]
-            self._save(data)
-            return True
-        return False
-
-    def list_all(self) -> List[Dict[str, Any]]:
-        data = self._load()
-        out: List[Dict[str, Any]] = []
-        for name, entry in sorted(data["nodes"].items()):
-            out.append({"name": name, **entry})
-        return out
-
-    def resolve(self, chrome_node: Optional[str]) -> Optional[Dict[str, Any]]:
-        """Resolve a node name to its entry.
-
-        If ``chrome_node`` is provided, return that named node (or None).
-        If ``chrome_node`` is None, return the sole registered node when
-        exactly one is registered; otherwise return None (ambiguous or
-        empty).
-        """
-        if chrome_node:
-            return self.get(chrome_node)
-        nodes = self.list_all()
-        if len(nodes) == 1:
-            return nodes[0]
-        return None
@@ -1,193 +0,0 @@
-"""Remote node server.
-
-Runs on the machine that will host the Meet bot (typically the user's
-Mac laptop with a signed-in Chrome). Exposes a WebSocket endpoint that
-accepts signed RPC requests and dispatches them to the existing
-``plugins.google_meet.process_manager`` module.
-
-Launched by ``hermes meet node run``.
-
-Token handling
--------------
-On first boot we mint 32 hex chars of entropy and persist them at
-``$HERMES_HOME/workspace/meetings/node_token.json``. Subsequent boots
-reuse the same token so previously-approved gateways don't need to be
-re-paired. The operator copies this token out-of-band to the gateway
-via ``hermes meet node approve <name> <url> <token>``.
-
-Dependencies
------------
-``websockets`` is an optional dep. We import it lazily inside
-:meth:`serve` so installing the plugin doesn't require it unless you
-actually host a node.
-"""
-
-from __future__ import annotations
-
-import json
-import secrets
-import time
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-from hermes_constants import get_hermes_home
-from plugins.google_meet.node import protocol as _proto
-
-
-def _default_token_path() -> Path:
-    return Path(get_hermes_home()) / "workspace" / "meetings" / "node_token.json"
-
-
-class NodeServer:
-    """WebSocket server that executes meet bot RPCs locally."""
-
-    def __init__(
-        self,
-        host: str = "0.0.0.0",
-        port: int = 18789,
-        token_path: Optional[Path] = None,
-        display_name: str = "hermes-meet-node",
-    ) -> None:
-        self.host = host
-        self.port = port
-        self.display_name = display_name
-        self.token_path = Path(token_path) if token_path is not None else _default_token_path()
-        self._token: Optional[str] = None
-
-    # ----- token management --------------------------------------------
-
-    def ensure_token(self) -> str:
-        """Return the persisted shared secret, generating one on first use."""
-        if self._token:
-            return self._token
-        if self.token_path.is_file():
-            try:
-                data = json.loads(self.token_path.read_text(encoding="utf-8"))
-                tok = data.get("token")
-                if isinstance(tok, str) and tok:
-                    self._token = tok
-                    return tok
-            except (OSError, json.JSONDecodeError):
-                pass
-        tok = secrets.token_hex(16)  # 32 hex chars
-        self.token_path.parent.mkdir(parents=True, exist_ok=True)
-        tmp = self.token_path.with_suffix(".json.tmp")
-        tmp.write_text(
-            json.dumps({"token": tok, "generated_at": time.time()}, indent=2),
-            encoding="utf-8",
-        )
-        tmp.replace(self.token_path)
-        self._token = tok
-        return tok
-
-    def get_token(self) -> str:
-        """Alias for :meth:`ensure_token`; does not mutate on subsequent calls."""
-        return self.ensure_token()
-
-    # ----- dispatch -----------------------------------------------------
-
-    async def _handle_request(self, msg: Dict[str, Any]) -> Dict[str, Any]:
-        """Validate + dispatch a single decoded request envelope.
-
-        Always returns a response envelope (success or error); never
-        raises. Errors from inside the process_manager are wrapped into
-        the response payload's ``ok``/``error`` keys (which pm already
-        does) rather than being re-encoded as error envelopes — the
-        envelope-level error channel is reserved for auth / protocol
-        failures.
-        """
-        expected = self.ensure_token()
-        ok, reason = _proto.validate_request(msg, expected)
-        if not ok:
-            return _proto.make_error(str(msg.get("id") or ""), reason)
-
-        req_id = msg["id"]
-        t = msg["type"]
-        payload = msg["payload"]
-
-        # Import lazily so test mocks can monkeypatch freely.
-        from plugins.google_meet import process_manager as pm
-
-        try:
-            if t == "ping":
-                return {"type": "pong", "id": req_id,
-                        "payload": {"display_name": self.display_name,
-                                    "ts": time.time()}}
-            if t == "start_bot":
-                # Whitelist kwargs we pass through to pm.start.
-                kwargs = {
-                    k: payload[k]
-                    for k in ("url", "guest_name", "duration", "headed",
-                              "auth_state", "session_id", "out_dir")
-                    if k in payload
-                }
-                if "url" not in kwargs:
-                    return _proto.make_error(req_id, "missing 'url' in payload")
-                result = pm.start(**kwargs)
-                return _proto.make_response(req_id, result)
-            if t == "stop":
-                reason_arg = payload.get("reason", "requested")
-                result = pm.stop(reason=reason_arg)
-                return _proto.make_response(req_id, result)
-            if t == "status":
-                return _proto.make_response(req_id, pm.status())
-            if t == "transcript":
-                last = payload.get("last")
-                result = pm.transcript(last=last)
-                return _proto.make_response(req_id, result)
-            if t == "say":
-                # v2 wiring: enqueue into say_queue.jsonl inside the
-                # active meeting's out_dir when present. The bot-side
-                # consumer is v3+ (for v1 this is a stub returning ok).
-                text = payload.get("text", "")
-                active = pm._read_active()  # type: ignore[attr-defined]
-                enqueued = False
-                if active and active.get("out_dir"):
-                    queue = Path(active["out_dir"]) / "say_queue.jsonl"
-                    try:
-                        queue.parent.mkdir(parents=True, exist_ok=True)
-                        with queue.open("a", encoding="utf-8") as fh:
-                            fh.write(json.dumps({"text": text, "ts": time.time()}) + "\n")
-                        enqueued = True
-                    except OSError:
-                        enqueued = False
-                return _proto.make_response(
-                    req_id,
-                    {"ok": True, "enqueued": enqueued, "text": text},
-                )
-        except Exception as exc:  # noqa: BLE001 — surface any pm crash to client
-            return _proto.make_error(req_id, f"{type(exc).__name__}: {exc}")
-
-        return _proto.make_error(req_id, f"unhandled type: {t!r}")
-
-    # ----- server loop --------------------------------------------------
-
-    async def serve(self) -> None:
-        """Run the WebSocket server until cancelled.
-
-        Blocks forever. Callers typically wrap this in ``asyncio.run``.
-        """
-        try:
-            import websockets  # type: ignore
-        except ImportError as exc:
-            raise RuntimeError(
-                "NodeServer.serve requires the 'websockets' package. "
-                "Install it with: pip install websockets"
-            ) from exc
-
-        self.ensure_token()
-
-        async def _handler(ws):
-            async for raw in ws:
-                try:
-                    msg = _proto.decode(raw if isinstance(raw, str) else raw.decode("utf-8"))
-                except ValueError as exc:
-                    await ws.send(_proto.encode(_proto.make_error("", f"decode: {exc}")))
-                    continue
-                reply = await self._handle_request(msg)
-                await ws.send(_proto.encode(reply))
-
-        async with websockets.serve(_handler, self.host, self.port):
-            # Run until cancelled.
-            import asyncio
-            await asyncio.Future()
@@ -1,16 +0,0 @@
-name: google_meet
-version: 0.2.0
-description: "Join a Google Meet call, transcribe live captions, speak in realtime, and follow up afterwards. v1 transcribe-only is the default; v2 realtime duplex audio via OpenAI Realtime + BlackHole/PulseAudio ships with mode='realtime'; v3 remote node host lets the bot run on a different machine than the gateway (gateway on Linux, Chrome+signed-in profile on the user's Mac). Explicit-by-design: only joins meet.google.com URLs passed in \u2014 no calendar scanning, no auto-dial."
-author: NousResearch
-kind: standalone
-platforms:
-  - linux
-  - macos
-provides_tools:
-  - meet_join
-  - meet_leave
-  - meet_status
-  - meet_transcript
-  - meet_say
-hooks:
-  - on_session_end
@@ -1,326 +0,0 @@
-"""Subprocess lifecycle manager for the google_meet bot.
-
-Single active meeting at a time. Stores the running pid + out_dir in a
-session-scoped state file under ``$HERMES_HOME/workspace/meetings/.active.json``
-so tool calls across turns can find the bot, and ``on_session_end`` can clean
-it up.
-
-The bot runs as a detached subprocess — we don't hold file descriptors open,
-so the parent agent loop can't block on it. We communicate via files only.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import signal
-import subprocess
-import sys
-import time
-from pathlib import Path
-from typing import Any, Dict, Optional
-
-from hermes_constants import get_hermes_home
-
-# File + directory layout (under $HERMES_HOME):
-#
-#   workspace/meetings/
-#       .active.json                # pointer to current session's bot
-#       <meeting-id>/
-#           status.json             # live bot state (written by bot each tick)
-#           transcript.txt          # scraped captions
-#
-# .active.json holds:
-#   {"pid": 12345, "meeting_id": "abc-defg-hij", "out_dir": "...",
-#    "url": "https://meet.google.com/...", "started_at": 1714159200.0,
-#    "session_id": "optional"}
-
-
-def _root() -> Path:
-    return Path(get_hermes_home()) / "workspace" / "meetings"
-
-
-def _active_file() -> Path:
-    return _root() / ".active.json"
-
-
-def _read_active() -> Optional[Dict[str, Any]]:
-    p = _active_file()
-    if not p.is_file():
-        return None
-    try:
-        return json.loads(p.read_text(encoding="utf-8"))
-    except Exception:
-        return None
-
-
-def _write_active(data: Dict[str, Any]) -> None:
-    p = _active_file()
-    p.parent.mkdir(parents=True, exist_ok=True)
-    tmp = p.with_suffix(".json.tmp")
-    tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
-    tmp.replace(p)
-
-
-def _clear_active() -> None:
-    try:
-        _active_file().unlink()
-    except FileNotFoundError:
-        pass
-
-
-def _pid_alive(pid: int) -> bool:
-    try:
-        os.kill(pid, 0)
-    except ProcessLookupError:
-        return False
-    except PermissionError:
-        # Process exists but we can't signal it — treat as alive.
-        return True
-    return True
-
-
-# ---------------------------------------------------------------------------
-# Public API — used by tool handlers + CLI
-# ---------------------------------------------------------------------------
-
-def start(
-    url: str,
-    *,
-    out_dir: Optional[Path] = None,
-    headed: bool = False,
-    auth_state: Optional[str] = None,
-    guest_name: str = "Hermes Agent",
-    duration: Optional[str] = None,
-    session_id: Optional[str] = None,
-    mode: str = "transcribe",
-    realtime_model: Optional[str] = None,
-    realtime_voice: Optional[str] = None,
-    realtime_instructions: Optional[str] = None,
-    realtime_api_key: Optional[str] = None,
-) -> Dict[str, Any]:
-    """Spawn the meet_bot subprocess for *url*.
-
-    If a bot is already running for this hermes install, leave it first —
-    we enforce single-active-meeting semantics.
-
-    Returns a dict summarizing the started bot.
-    """
-    from plugins.google_meet.meet_bot import _is_safe_meet_url, _meeting_id_from_url
-
-    if not _is_safe_meet_url(url):
-        return {
-            "ok": False,
-            "error": (
-                "refusing: only https://meet.google.com/ URLs are allowed. "
-                "got: " + repr(url)
-            ),
-        }
-
-    existing = _read_active()
-    if existing and _pid_alive(int(existing.get("pid", 0))):
-        stop(reason="replaced by new meet_join")
-
-    meeting_id = _meeting_id_from_url(url)
-    out = out_dir or (_root() / meeting_id)
-    out.mkdir(parents=True, exist_ok=True)
-
-    # Wipe any stale transcript/status files from a previous run of this
-    # meeting id so polling isn't confused.
-    for name in ("transcript.txt", "status.json"):
-        f = out / name
-        if f.exists():
-            try:
-                f.unlink()
-            except OSError:
-                pass
-
-    env = os.environ.copy()
-    env["HERMES_MEET_URL"] = url
-    env["HERMES_MEET_OUT_DIR"] = str(out)
-    env["HERMES_MEET_GUEST_NAME"] = guest_name
-    if headed:
-        env["HERMES_MEET_HEADED"] = "1"
-    if auth_state:
-        env["HERMES_MEET_AUTH_STATE"] = auth_state
-    if duration:
-        env["HERMES_MEET_DURATION"] = duration
-    # v2: realtime mode + passthroughs. The bot defaults to transcribe
-    # mode if HERMES_MEET_MODE isn't set, matching v1 behavior.
-    if mode:
-        env["HERMES_MEET_MODE"] = mode
-    if realtime_model:
-        env["HERMES_MEET_REALTIME_MODEL"] = realtime_model
-    if realtime_voice:
-        env["HERMES_MEET_REALTIME_VOICE"] = realtime_voice
-    if realtime_instructions:
-        env["HERMES_MEET_REALTIME_INSTRUCTIONS"] = realtime_instructions
-    if realtime_api_key:
-        env["HERMES_MEET_REALTIME_KEY"] = realtime_api_key
-
-    log_path = out / "bot.log"
-    # Detach: stdin=devnull, stdout/stderr → log file, new session so parent
-    # signals don't propagate.
-    log_fh = open(log_path, "ab", buffering=0)
-    try:
-        proc = subprocess.Popen(
-            [sys.executable, "-m", "plugins.google_meet.meet_bot"],
-            stdin=subprocess.DEVNULL,
-            stdout=log_fh,
-            stderr=subprocess.STDOUT,
-            env=env,
-            start_new_session=True,
-            close_fds=True,
-        )
-    finally:
-        # The subprocess now owns the log fd; we can close ours.
-        log_fh.close()
-
-    record = {
-        "pid": proc.pid,
-        "meeting_id": meeting_id,
-        "out_dir": str(out),
-        "url": url,
-        "started_at": time.time(),
-        "session_id": session_id,
-        "log_path": str(log_path),
-        "mode": mode,
-    }
-    _write_active(record)
-    return {"ok": True, **record}
-
-
-def status() -> Dict[str, Any]:
-    """Return the current meeting state, or ``{"ok": False, "reason": ...}``."""
-    active = _read_active()
-    if not active:
-        return {"ok": False, "reason": "no active meeting"}
-
-    pid = int(active.get("pid", 0))
-    alive = _pid_alive(pid) if pid else False
-
-    status_path = Path(active.get("out_dir", "")) / "status.json"
-    bot_status: Dict[str, Any] = {}
-    if status_path.is_file():
-        try:
-            bot_status = json.loads(status_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    return {
-        "ok": True,
-        "alive": alive,
-        "pid": pid,
-        "meetingId": active.get("meeting_id"),
-        "url": active.get("url"),
-        "startedAt": active.get("started_at"),
-        "outDir": active.get("out_dir"),
-        **bot_status,
-    }
-
-
-def transcript(last: Optional[int] = None) -> Dict[str, Any]:
-    """Read the current transcript file. Returns ok=False if none exists."""
-    active = _read_active()
-    if not active:
-        return {"ok": False, "reason": "no active meeting"}
-
-    tp = Path(active.get("out_dir", "")) / "transcript.txt"
-    if not tp.is_file():
-        return {
-            "ok": True,
-            "meetingId": active.get("meeting_id"),
-            "lines": [],
-            "total": 0,
-            "path": str(tp),
-        }
-    text = tp.read_text(encoding="utf-8", errors="replace")
-    all_lines = [ln for ln in text.splitlines() if ln.strip()]
-    lines = all_lines[-last:] if last else all_lines
-    return {
-        "ok": True,
-        "meetingId": active.get("meeting_id"),
-        "lines": lines,
-        "total": len(all_lines),
-        "path": str(tp),
-    }
-
-
-def enqueue_say(text: str) -> Dict[str, Any]:
-    """Append a ``say`` request to the active bot's JSONL queue.
-
-    Returns ``{"ok": False, "reason": ...}`` when no meeting is active or
-    the active bot is in transcribe-only mode. Otherwise writes a line to
-    ``<out_dir>/say_queue.jsonl`` that the bot's realtime speaker thread
-    will consume.
-    """
-    import uuid
-
-    text = (text or "").strip()
-    if not text:
-        return {"ok": False, "reason": "text is required"}
-
-    active = _read_active()
-    if not active:
-        return {"ok": False, "reason": "no active meeting"}
-    if active.get("mode") != "realtime":
-        return {
-            "ok": False,
-            "reason": (
-                "active meeting is in transcribe mode — pass mode='realtime' "
-                "to meet_join to enable agent speech"
-            ),
-        }
-
-    out_dir = Path(active.get("out_dir", ""))
-    if not out_dir.is_dir():
-        return {"ok": False, "reason": f"out_dir missing: {out_dir}"}
-
-    queue_path = out_dir / "say_queue.jsonl"
-    entry = {"id": uuid.uuid4().hex[:12], "text": text}
-    with queue_path.open("a", encoding="utf-8") as f:
-        f.write(json.dumps(entry) + "\n")
-    return {
-        "ok": True,
-        "meetingId": active.get("meeting_id"),
-        "enqueued_id": entry["id"],
-        "queue_path": str(queue_path),
-    }
-
-
-def stop(*, reason: str = "requested") -> Dict[str, Any]:
-    """Signal the active bot to leave cleanly, then clear the active pointer.
-
-    Sends SIGTERM and waits up to 10s for the bot to exit. Falls back to
-    SIGKILL if the bot doesn't respond.
-    """
-    active = _read_active()
-    if not active:
-        return {"ok": False, "reason": "no active meeting"}
-
-    pid = int(active.get("pid", 0))
-    out_dir = active.get("out_dir")
-    transcript_path = Path(out_dir) / "transcript.txt" if out_dir else None
-
-    if pid and _pid_alive(pid):
-        try:
-            os.kill(pid, signal.SIGTERM)
-        except ProcessLookupError:
-            pass
-        for _ in range(20):
-            if not _pid_alive(pid):
-                break
-            time.sleep(0.5)
-        if _pid_alive(pid):
-            try:
-                os.kill(pid, signal.SIGKILL)
-            except ProcessLookupError:
-                pass
-
-    _clear_active()
-    return {
-        "ok": True,
-        "reason": reason,
-        "meetingId": active.get("meeting_id"),
-        "transcriptPath": str(transcript_path) if transcript_path else None,
-    }
@@ -1,10 +0,0 @@
-"""Realtime speech subpackage for the google_meet plugin (v2).
-
-Provides a thin OpenAI Realtime API client and a file-queue speaker
-wrapper so the Meet bot can play synthesized speech through the
-virtual audio bridge.
-"""
-
-from .openai_client import RealtimeSession, RealtimeSpeaker  # noqa: F401
-
-__all__ = ["RealtimeSession", "RealtimeSpeaker"]
@@ -1,332 +0,0 @@
-"""OpenAI Realtime API WebSocket client + file-queue speaker.
-
-This module is the "output" side of the v2 voice bridge: it takes text,
-sends it to the OpenAI Realtime API, receives audio deltas back, and
-appends the PCM bytes to a file. A separate consumer (the audio
-bridge) streams that file into Chrome's fake microphone.
-
-Designed for simplicity: a single synchronous WebSocket connection per
-speaker, per session. The ``websockets`` package is imported lazily so
-that importing this module never fails just because the optional dep
-is missing.
-"""
-
-from __future__ import annotations
-
-import base64
-import json
-import time
-import uuid
-from pathlib import Path
-from typing import Any, Callable, Optional
-
-
-REALTIME_URL = "wss://api.openai.com/v1/realtime"
-
-
-def _require_websockets():
-    """Import ``websockets.sync.client.connect`` or raise with hint."""
-    try:
-        from websockets.sync.client import connect as _connect  # type: ignore
-    except ImportError as exc:  # pragma: no cover - exercised via test
-        raise RuntimeError(
-            "websockets package is required for OpenAI Realtime; "
-            "install with: pip install websockets"
-        ) from exc
-    return _connect
-
-
-class RealtimeSession:
-    """Minimal sync client for the OpenAI Realtime WebSocket API.
-
-    Usage:
-        sess = RealtimeSession(api_key=..., audio_sink_path=Path("out.pcm"))
-        sess.connect()
-        sess.speak("Hello team.")
-        sess.close()
-
-    Thread safety: ``speak`` and ``cancel_response`` may be called from
-    different threads; a lock serializes WebSocket writes.
-    """
-
-    def __init__(
-        self,
-        api_key: str,
-        model: str = "gpt-realtime",
-        voice: str = "alloy",
-        instructions: str = "",
-        audio_sink_path: Optional[Path] = None,
-        sample_rate: int = 24000,
-    ) -> None:
-        import threading as _threading
-        self.api_key = api_key
-        self.model = model
-        self.voice = voice
-        self.instructions = instructions
-        self.audio_sink_path = Path(audio_sink_path) if audio_sink_path else None
-        self.sample_rate = sample_rate
-        self._ws: Any = None
-        self._send_lock = _threading.Lock()
-        self._last_response_id: Optional[str] = None
-        # Public counters for status reporting.
-        self.audio_bytes_out: int = 0
-        self.last_audio_out_at: Optional[float] = None
-
-    # ── lifecycle ─────────────────────────────────────────────────────────
-
-    def connect(self) -> None:
-        """Open WS and send session.update with voice+instructions."""
-        connect = _require_websockets()
-        url = f"{REALTIME_URL}?model={self.model}"
-        headers = [
-            ("Authorization", f"Bearer {self.api_key}"),
-            ("OpenAI-Beta", "realtime=v1"),
-        ]
-        # websockets.sync.client.connect accepts either additional_headers=
-        # (newer) or extra_headers= depending on version; try the newer
-        # name first and fall back.
-        try:
-            self._ws = connect(url, additional_headers=headers)
-        except TypeError:
-            self._ws = connect(url, extra_headers=headers)
-
-        self._send_json(
-            {
-                "type": "session.update",
-                "session": {
-                    "voice": self.voice,
-                    "instructions": self.instructions,
-                    "modalities": ["audio", "text"],
-                    "output_audio_format": "pcm16",
-                    "input_audio_format": "pcm16",
-                },
-            }
-        )
-
-    def close(self) -> None:
-        if self._ws is not None:
-            try:
-                self._ws.close()
-            except Exception:
-                pass
-            self._ws = None
-
-    # ── speaking ──────────────────────────────────────────────────────────
-
-    def speak(self, text: str, timeout: float = 30.0) -> dict:
-        """Send ``text`` and accumulate the audio response.
-
-        Audio deltas are base64-decoded and appended to
-        ``audio_sink_path`` (opened 'ab' and closed per call, so a
-        separate streaming reader can consume whatever is there).
-        """
-        if self._ws is None:
-            raise RuntimeError("RealtimeSession.connect() must be called first")
-
-        start = time.monotonic()
-
-        self._send_json(
-            {
-                "type": "conversation.item.create",
-                "item": {
-                    "type": "message",
-                    "role": "user",
-                    "content": [{"type": "input_text", "text": text}],
-                },
-            }
-        )
-        self._send_json(
-            {
-                "type": "response.create",
-                "response": {"modalities": ["audio"]},
-            }
-        )
-
-        bytes_written = 0
-        sink_fp = None
-        if self.audio_sink_path is not None:
-            self.audio_sink_path.parent.mkdir(parents=True, exist_ok=True)
-            sink_fp = open(self.audio_sink_path, "ab")
-
-        try:
-            while True:
-                remaining = timeout - (time.monotonic() - start)
-                if remaining <= 0:
-                    raise TimeoutError(
-                        f"realtime response did not complete within {timeout}s"
-                    )
-                raw = self._recv(timeout=remaining)
-                if raw is None:
-                    # Connection closed by peer.
-                    break
-                try:
-                    frame = json.loads(raw) if isinstance(raw, (str, bytes, bytearray)) else raw
-                except (TypeError, ValueError):
-                    continue
-                if not isinstance(frame, dict):
-                    continue
-                ftype = frame.get("type")
-                if ftype == "response.audio.delta":
-                    b64 = frame.get("delta") or frame.get("audio") or ""
-                    if b64 and sink_fp is not None:
-                        try:
-                            chunk = base64.b64decode(b64)
-                        except (ValueError, TypeError):
-                            chunk = b""
-                        if chunk:
-                            sink_fp.write(chunk)
-                            sink_fp.flush()
-                            bytes_written += len(chunk)
-                            self.audio_bytes_out += len(chunk)
-                            self.last_audio_out_at = time.time()
-                elif ftype == "response.created":
-                    rid = (frame.get("response") or {}).get("id")
-                    if rid:
-                        self._last_response_id = rid
-                elif ftype in ("response.done", "response.completed", "response.cancelled"):
-                    break
-                elif ftype == "error":
-                    err = frame.get("error") or frame
-                    raise RuntimeError(f"realtime error: {err}")
-                # All other frames (response.created, response.output_item.*,
-                # response.audio_transcript.delta, rate_limits.updated, ...)
-                # are ignored for v2.
-        finally:
-            if sink_fp is not None:
-                sink_fp.close()
-
-        duration_ms = (time.monotonic() - start) * 1000.0
-        return {
-            "ok": True,
-            "bytes_written": bytes_written,
-            "duration_ms": duration_ms,
-        }
-
-    # ── ws plumbing ───────────────────────────────────────────────────────
-
-    def cancel_response(self) -> bool:
-        """Interrupt the in-flight response (barge-in).
-
-        Sends ``response.cancel`` on the current WebSocket so the model
-        stops generating audio immediately. Safe to call at any time;
-        returns True if a cancel was actually sent, False when there's
-        nothing to cancel or the socket isn't open.
-        """
-        if self._ws is None:
-            return False
-        try:
-            self._send_json({"type": "response.cancel"})
-            return True
-        except Exception:
-            return False
-
-    def _send_json(self, payload: dict) -> None:
-        assert self._ws is not None
-        with self._send_lock:
-            self._ws.send(json.dumps(payload))
-
-    def _recv(self, timeout: Optional[float] = None):
-        assert self._ws is not None
-        try:
-            if timeout is None:
-                return self._ws.recv()
-            return self._ws.recv(timeout=timeout)
-        except TypeError:
-            # Older websockets may not accept timeout kwarg.
-            return self._ws.recv()
-
-
-class RealtimeSpeaker:
-    """File-based JSONL queue wrapper around :class:`RealtimeSession`.
-
-    Each line in ``queue_path`` is a JSON object of the form
-    ``{"id": "<uuid>", "text": "..."}``. Processed lines are appended
-    to ``processed_path`` (if set) and then removed from the queue;
-    if ``processed_path`` is ``None``, processed lines are simply
-    dropped.
-    """
-
-    def __init__(
-        self,
-        session: RealtimeSession,
-        queue_path: Path,
-        processed_path: Optional[Path] = None,
-    ) -> None:
-        self.session = session
-        self.queue_path = Path(queue_path)
-        self.processed_path = Path(processed_path) if processed_path else None
-
-    # ── helpers ──────────────────────────────────────────────────────────
-
-    def _read_queue(self) -> list[dict]:
-        if not self.queue_path.exists():
-            return []
-        out: list[dict] = []
-        for line in self.queue_path.read_text().splitlines():
-            line = line.strip()
-            if not line:
-                continue
-            try:
-                entry = json.loads(line)
-            except ValueError:
-                continue
-            if not isinstance(entry, dict):
-                continue
-            if "id" not in entry:
-                entry["id"] = str(uuid.uuid4())
-            out.append(entry)
-        return out
-
-    def _rewrite_queue(self, remaining: list[dict]) -> None:
-        if not remaining:
-            # Keep the file but empty — consumers may be watching for
-            # new writes via mtime, and delete-then-recreate is a race.
-            self.queue_path.write_text("")
-            return
-        self.queue_path.write_text(
-            "\n".join(json.dumps(e) for e in remaining) + "\n"
-        )
-
-    def _append_processed(self, entry: dict, result: dict) -> None:
-        if self.processed_path is None:
-            return
-        self.processed_path.parent.mkdir(parents=True, exist_ok=True)
-        record = {"id": entry.get("id"), "text": entry.get("text", ""), "result": result}
-        with open(self.processed_path, "a") as fp:
-            fp.write(json.dumps(record) + "\n")
-
-    # ── main loop ────────────────────────────────────────────────────────
-
-    def run_until_stopped(
-        self,
-        stop_fn: Callable[[], bool],
-        poll_interval: float = 0.5,
-    ) -> None:
-        while not stop_fn():
-            entries = self._read_queue()
-            if not entries:
-                time.sleep(poll_interval)
-                continue
-            # Process one at a time; re-check the queue file after each
-            # speak() call because new entries may have arrived.
-            head = entries[0]
-            text = (head.get("text") or "").strip()
-            if text:
-                try:
-                    result = self.session.speak(text)
-                except Exception as exc:
-                    result = {"ok": False, "error": str(exc)}
-            else:
-                result = {"ok": True, "bytes_written": 0, "duration_ms": 0.0}
-            self._append_processed(head, result)
-
-            # Re-read the queue from disk in case it was appended to
-            # while we were speaking, then drop the head.
-            latest = self._read_queue()
-            if latest and latest[0].get("id") == head.get("id"):
-                self._rewrite_queue(latest[1:])
-            else:
-                # Fallback: drop-by-id anywhere in the queue.
-                self._rewrite_queue(
-                    [e for e in latest if e.get("id") != head.get("id")]
-                )
@@ -1,348 +0,0 @@
-"""Agent-facing tools for the google_meet plugin.
-
-Tools:
-  meet_join        — join a Google Meet URL (spawns Playwright bot locally
-                     OR on a remote node host via node=<name>)
-  meet_status      — report bot liveness + transcript progress
-  meet_transcript  — read the current transcript (optional last-N)
-  meet_leave       — signal the bot to leave cleanly
-  meet_say         — (v2) speak text through the realtime audio bridge.
-                     Requires the active meeting to have been joined with
-                     mode='realtime'.
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any, Dict, Optional
-
-from plugins.google_meet import process_manager as pm
-
-
-# ---------------------------------------------------------------------------
-# Runtime gate
-# ---------------------------------------------------------------------------
-
-def check_meet_requirements() -> bool:
-    """Return True when the plugin can actually run LOCALLY.
-
-    Gates on:
-      * Python ``playwright`` package importable
-      * the plugin being on a supported platform (Linux or macOS)
-
-    Note: remote-node operation (``node=<name>``) only needs the
-    ``websockets`` dep on the gateway side — Chromium lives on the node.
-    But the plugin-level gate keeps the v1 semantics; individual tool
-    handlers relax the requirement when a node is addressed.
-    """
-    import platform as _p
-    if _p.system().lower() not in ("linux", "darwin"):
-        return False
-    try:
-        import playwright  # noqa: F401
-    except ImportError:
-        return False
-    return True
-
-
-# ---------------------------------------------------------------------------
-# Node client helper
-# ---------------------------------------------------------------------------
-
-def _resolve_node_client(node: Optional[str]):
-    """Return (NodeClient, node_name) for *node*, or (None, None) to run local.
-
-    Raises RuntimeError with a readable message if the node is named but
-    unresolvable, so the handler can surface a clear error to the agent.
-    """
-    if node is None or node == "":
-        return None, None
-    from plugins.google_meet.node.registry import NodeRegistry
-    from plugins.google_meet.node.client import NodeClient
-
-    reg = NodeRegistry()
-    entry = reg.resolve(node if node != "auto" else None)
-    if entry is None:
-        raise RuntimeError(
-            f"no registered meet node matches {node!r} — "
-            "run `hermes meet node approve <name> <url> <token>` first"
-        )
-    client = NodeClient(url=entry["url"], token=entry["token"])
-    return client, entry.get("name")
-
-
-# ---------------------------------------------------------------------------
-# Schemas
-# ---------------------------------------------------------------------------
-
-MEET_JOIN_SCHEMA: Dict[str, Any] = {
-    "name": "meet_join",
-    "description": (
-        "Join a Google Meet call and start scraping live captions into a "
-        "transcript file. Only meet.google.com URLs are accepted; no calendar "
-        "scanning, no auto-dial. Spawns a headless Chromium subprocess that "
-        "runs in parallel with the agent loop — returns immediately. Poll "
-        "with meet_status and read captions with meet_transcript. Reminder "
-        "to the agent: you should announce yourself in the meeting (there is "
-        "no automatic consent announcement)."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "url": {
-                "type": "string",
-                "description": (
-                    "Full https://meet.google.com/... URL. Required."
-                ),
-            },
-            "mode": {
-                "type": "string",
-                "enum": ["transcribe", "realtime"],
-                "description": (
-                    "transcribe (default): listen-only, scrape captions. "
-                    "realtime: also enable agent speech via meet_say "
-                    "(requires OpenAI Realtime key + platform audio bridge)."
-                ),
-            },
-            "guest_name": {
-                "type": "string",
-                "description": (
-                    "Display name to use when joining as guest. Defaults to "
-                    "'Hermes Agent'."
-                ),
-            },
-            "duration": {
-                "type": "string",
-                "description": (
-                    "Optional max duration before auto-leave (e.g. '30m', "
-                    "'2h', '90s'). Omit to stay until meet_leave is called."
-                ),
-            },
-            "headed": {
-                "type": "boolean",
-                "description": (
-                    "Run Chromium headed instead of headless (debug only). "
-                    "Default false."
-                ),
-            },
-            "node": {
-                "type": "string",
-                "description": (
-                    "Name of a registered remote node to run the bot on "
-                    "(useful when the gateway runs on a headless Linux box "
-                    "but the user's Chrome with a signed-in Google profile "
-                    "lives on their Mac). Pass 'auto' to use the single "
-                    "registered node. Default: run locally. Nodes are "
-                    "approved via `hermes meet node approve`."
-                ),
-            },
-        },
-        "required": ["url"],
-        "additionalProperties": False,
-    },
-}
-
-MEET_STATUS_SCHEMA: Dict[str, Any] = {
-    "name": "meet_status",
-    "description": (
-        "Report the current Meet session state — whether the bot is alive, "
-        "has joined, is sitting in the lobby, number of transcript lines "
-        "captured, and last-caption timestamp."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "node": {"type": "string"},
-        },
-        "additionalProperties": False,
-    },
-}
-
-MEET_TRANSCRIPT_SCHEMA: Dict[str, Any] = {
-    "name": "meet_transcript",
-    "description": (
-        "Read the scraped transcript for the active Meet session. Returns "
-        "full transcript unless 'last' is set, in which case returns the last "
-        "N lines only."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "last": {
-                "type": "integer",
-                "description": (
-                    "Optional: return only the last N caption lines. Useful "
-                    "for polling during a meeting without re-reading the "
-                    "whole transcript."
-                ),
-                "minimum": 1,
-            },
-            "node": {"type": "string"},
-        },
-        "additionalProperties": False,
-    },
-}
-
-MEET_LEAVE_SCHEMA: Dict[str, Any] = {
-    "name": "meet_leave",
-    "description": (
-        "Leave the active Meet call cleanly, stop caption scraping, and "
-        "finalize the transcript file. Safe to call when no meeting is "
-        "active — returns ok=false with a reason."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "node": {"type": "string"},
-        },
-        "additionalProperties": False,
-    },
-}
-
-MEET_SAY_SCHEMA: Dict[str, Any] = {
-    "name": "meet_say",
-    "description": (
-        "Speak text into the active Meet call. Requires the active meeting "
-        "to have been joined with mode='realtime'. The text is queued to "
-        "the bot's OpenAI Realtime session; the generated audio is streamed "
-        "into Chrome's fake microphone via a virtual audio device "
-        "(PulseAudio null-sink on Linux, BlackHole on macOS). Returns "
-        "immediately — the actual speech lags by a couple of seconds."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "text": {"type": "string", "description": "Text to speak."},
-            "node": {"type": "string"},
-        },
-        "required": ["text"],
-        "additionalProperties": False,
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# Handlers
-# ---------------------------------------------------------------------------
-
-def _json(obj: Any) -> str:
-    return json.dumps(obj, ensure_ascii=False)
-
-
-def _err(msg: str, **extra) -> str:
-    return _json({"success": False, "error": msg, **extra})
-
-
-def handle_meet_join(args: Dict[str, Any], **_kw) -> str:
-    url = (args.get("url") or "").strip()
-    if not url:
-        return _err("url is required")
-    mode = (args.get("mode") or "transcribe").strip().lower()
-    if mode not in ("transcribe", "realtime"):
-        return _err(f"mode must be 'transcribe' or 'realtime' (got {mode!r})")
-
-    node = args.get("node")
-    try:
-        client, node_name = _resolve_node_client(node)
-    except RuntimeError as e:
-        return _err(str(e))
-
-    if client is not None:
-        # Remote path — delegate to the node host.
-        try:
-            res = client.start_bot(
-                url=url,
-                guest_name=str(args.get("guest_name") or "Hermes Agent"),
-                duration=str(args.get("duration")) if args.get("duration") else None,
-                headed=bool(args.get("headed", False)),
-                mode=mode,
-            )
-            return _json({"success": bool(res.get("ok")), "node": node_name, **res})
-        except Exception as e:
-            return _err(f"remote node start_bot failed: {e}", node=node_name)
-
-    # Local path — same as v1, with v2 params.
-    if not check_meet_requirements():
-        return _err(
-            "google_meet plugin prerequisites missing — install with "
-            "`pip install playwright && python -m playwright install "
-            "chromium`. Plugin is supported on Linux and macOS only."
-        )
-    res = pm.start(
-        url=url,
-        headed=bool(args.get("headed", False)),
-        guest_name=str(args.get("guest_name") or "Hermes Agent"),
-        duration=str(args.get("duration")) if args.get("duration") else None,
-        mode=mode,
-    )
-    return _json({"success": bool(res.get("ok")), **res})
-
-
-def handle_meet_status(args: Dict[str, Any], **_kw) -> str:
-    try:
-        client, node_name = _resolve_node_client(args.get("node"))
-    except RuntimeError as e:
-        return _err(str(e))
-    if client is not None:
-        try:
-            res = client.status()
-            return _json({"success": bool(res.get("ok")), "node": node_name, **res})
-        except Exception as e:
-            return _err(f"remote node status failed: {e}", node=node_name)
-    res = pm.status()
-    return _json({"success": bool(res.get("ok")), **res})
-
-
-def handle_meet_transcript(args: Dict[str, Any], **_kw) -> str:
-    last = args.get("last")
-    try:
-        last_i = int(last) if last is not None else None
-        if last_i is not None and last_i < 1:
-            last_i = None
-    except (TypeError, ValueError):
-        last_i = None
-    try:
-        client, node_name = _resolve_node_client(args.get("node"))
-    except RuntimeError as e:
-        return _err(str(e))
-    if client is not None:
-        try:
-            res = client.transcript(last=last_i)
-            return _json({"success": bool(res.get("ok")), "node": node_name, **res})
-        except Exception as e:
-            return _err(f"remote node transcript failed: {e}", node=node_name)
-    res = pm.transcript(last=last_i)
-    return _json({"success": bool(res.get("ok")), **res})
-
-
-def handle_meet_leave(args: Dict[str, Any], **_kw) -> str:
-    try:
-        client, node_name = _resolve_node_client(args.get("node"))
-    except RuntimeError as e:
-        return _err(str(e))
-    if client is not None:
-        try:
-            res = client.stop()
-            return _json({"success": bool(res.get("ok")), "node": node_name, **res})
-        except Exception as e:
-            return _err(f"remote node stop failed: {e}", node=node_name)
-    res = pm.stop(reason="agent called meet_leave")
-    return _json({"success": bool(res.get("ok")), **res})
-
-
-def handle_meet_say(args: Dict[str, Any], **_kw) -> str:
-    text = (args.get("text") or "").strip()
-    if not text:
-        return _err("text is required")
-    try:
-        client, node_name = _resolve_node_client(args.get("node"))
-    except RuntimeError as e:
-        return _err(str(e))
-    if client is not None:
-        try:
-            res = client.say(text)
-            return _json({"success": bool(res.get("ok")), "node": node_name, **res})
-        except Exception as e:
-            return _err(f"remote node say failed: {e}", node=node_name)
-    res = pm.enqueue_say(text)
-    return _json({"success": bool(res.get("ok")), **res})
@@ -526,24 +526,16 @@ class HindsightMemoryProvider(MemoryProvider):

        print("\n  Configuring Hindsight memory:\n")

-        existing_config = self._config if isinstance(self._config, dict) else _load_config()
-        if not isinstance(existing_config, dict):
-            existing_config = {}
-
        # Step 1: Mode selection
-        mode_values = ["cloud", "local_embedded", "local_external"]
        mode_items = [
            ("Cloud", "Hindsight Cloud API (lightweight, just needs an API key)"),
            ("Local Embedded", "Run Hindsight locally (downloads ~200MB, needs LLM key)"),
            ("Local External", "Connect to an existing Hindsight instance"),
        ]
-        existing_mode = existing_config.get("mode")
-        mode_default_idx = mode_values.index(existing_mode) if existing_mode in mode_values else 0
-        mode_idx = _curses_select("  Select mode", mode_items, default=mode_default_idx)
-        mode = mode_values[mode_idx]
+        mode_idx = _curses_select("  Select mode", mode_items, default=0)
+        mode = ["cloud", "local_embedded", "local_external"][mode_idx]

-        provider_config: dict = dict(existing_config)
-        provider_config["mode"] = mode
+        provider_config: dict = {"mode": mode}
        env_writes: dict = {}

        # Step 2: Install/upgrade deps for selected mode
@@ -609,29 +601,21 @@ class HindsightMemoryProvider(MemoryProvider):
                (p, f"default model: {_PROVIDER_DEFAULT_MODELS[p]}")
                for p in providers_list
            ]
-            existing_llm_provider = provider_config.get("llm_provider")
-            llm_default_idx = providers_list.index(existing_llm_provider) if existing_llm_provider in providers_list else 0
-            llm_idx = _curses_select("  Select LLM provider", llm_items, default=llm_default_idx)
+            llm_idx = _curses_select("  Select LLM provider", llm_items, default=0)
            llm_provider = providers_list[llm_idx]

            provider_config["llm_provider"] = llm_provider

            if llm_provider == "openai_compatible":
-                existing_base_url = provider_config.get("llm_base_url", "")
-                prompt = "  LLM endpoint URL (e.g. http://192.168.1.10:8080/v1)"
-                if existing_base_url:
-                    prompt += f" [{existing_base_url}]"
-                prompt += ": "
-                val = input(prompt).strip()
+                val = input("  LLM endpoint URL (e.g. http://192.168.1.10:8080/v1): ").strip()
                if val:
                    provider_config["llm_base_url"] = val
            elif llm_provider == "openrouter":
                provider_config["llm_base_url"] = "https://openrouter.ai/api/v1"

-            provider_default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini")
-            current_model = provider_config.get("llm_model") or provider_default_model
-            val = input(f"  LLM model [{current_model}]: ").strip()
-            provider_config["llm_model"] = val or current_model
+            default_model = _PROVIDER_DEFAULT_MODELS.get(llm_provider, "gpt-4o-mini")
+            val = input(f"  LLM model [{default_model}]: ").strip()
+            provider_config["llm_model"] = val or default_model

            sys.stdout.write("  LLM API key: ")
            sys.stdout.flush()
@@ -649,16 +633,15 @@ class HindsightMemoryProvider(MemoryProvider):
                env_writes["HINDSIGHT_LLM_API_KEY"] = existing_llm_key

        # Step 4: Save everything
-        provider_config.setdefault("bank_id", "hermes")
-        provider_config.setdefault("recall_budget", "mid")
-        # Read existing timeout from config if present, otherwise use default.
-        # Preserve explicit 0 values instead of treating them as blank.
-        existing_timeout = provider_config.get("timeout")
-        timeout_val = existing_timeout if existing_timeout is not None else _DEFAULT_TIMEOUT
+        provider_config["bank_id"] = "hermes"
+        provider_config["recall_budget"] = "mid"
+        # Read existing timeout from config if present, otherwise use default
+        existing_timeout = self._config.get("timeout") if self._config else None
+        timeout_val = existing_timeout if existing_timeout else _DEFAULT_TIMEOUT
        provider_config["timeout"] = timeout_val
        env_writes["HINDSIGHT_TIMEOUT"] = str(timeout_val)
        if mode == "local_embedded":
-            existing_idle_timeout = provider_config.get("idle_timeout")
+            existing_idle_timeout = self._config.get("idle_timeout") if self._config else None
            idle_timeout_val = existing_idle_timeout if existing_idle_timeout is not None else _DEFAULT_IDLE_TIMEOUT
            provider_config["idle_timeout"] = idle_timeout_val
            env_writes["HINDSIGHT_IDLE_TIMEOUT"] = str(idle_timeout_val)
@@ -1221,6 +1204,7 @@ class HindsightMemoryProvider(MemoryProvider):

        def _sync():
            try:
+                client = self._get_client()
                item = self._build_retain_kwargs(
                    content,
                    context=self._retain_context,
@@ -22,7 +22,6 @@ import threading
 import time
 from typing import Any, Dict, List, Optional

-from agent.memory_manager import sanitize_context
 from agent.memory_provider import MemoryProvider
 from tools.registry import tool_error

@@ -38,10 +37,7 @@ PROFILE_SCHEMA = {
    "description": (
        "Retrieve or update a peer card from Honcho — a curated list of key facts "
        "about that peer (name, role, preferences, communication style, patterns). "
-        "Pass `card` to update; omit `card` to read.  If the card is empty, the "
-        "result includes a `hint` field explaining why (observation disabled, "
-        "fresh peer, dialectic layer still warming up, etc.) — this is NOT an "
-        "error.  Peer cards accumulate over time from observed conversation."
+        "Pass `card` to update; omit `card` to read."
    ),
    "parameters": {
        "type": "object",
@@ -1060,63 +1056,6 @@ class HonchoMemoryProvider(MemoryProvider):

        return chunks

-    def _empty_profile_hint(self, peer: str) -> Dict[str, Any]:
-        """Build a diagnostic hint when honcho_profile returns an empty card.
-
-        A literal "No profile facts available yet." tells the model nothing
-        about WHY.  The model then often surfaces it to the user as a cryptic
-        error.  This hint enumerates the likely causes so the model can
-        explain the situation (or retry with a different peer).
-
-        Ordered by likelihood for a typical deployment:
-          1. Observation is disabled for this peer
-          2. Card hasn't accumulated yet (fresh peer, not enough dialectic
-             cycles — dialectic cadence runs every N turns)
-          3. Self-hosted Honcho backend doesn't support peer cards
-             (honcho-ai server < 3.x)
-        """
-        cfg = self._config
-        reasons: List[str] = []
-
-        if cfg is not None:
-            if peer == "user":
-                observe_me = bool(getattr(cfg, "user_observe_me", True))
-                observe_others = bool(getattr(cfg, "user_observe_others", True))
-            else:
-                observe_me = bool(getattr(cfg, "ai_observe_me", True))
-                observe_others = bool(getattr(cfg, "ai_observe_others", True))
-            if not (observe_me or observe_others):
-                reasons.append(
-                    f"observation is disabled for peer '{peer}' "
-                    f"(user_observe_me/ai_observe_me in config)"
-                )
-
-        cadence = getattr(self, "_dialectic_cadence", 1)
-        turn = getattr(self, "_turn_count", 0)
-        if turn < max(2, cadence):
-            reasons.append(
-                f"this session has only {turn} turn(s); peer cards accumulate "
-                f"as the dialectic layer reasons over conversation history "
-                f"(cadence every {cadence} turn(s))"
-            )
-
-        if not reasons:
-            reasons.append(
-                "peer card has no facts yet — Honcho's dialectic layer builds "
-                "this over time from observed turns; self-hosted Honcho < 3.x "
-                "does not support peer cards at all"
-            )
-
-        return {
-            "result": "No profile facts available yet.",
-            "hint": (
-                "This is not an error.  "
-                + "; ".join(reasons)
-                + ".  Try honcho_reasoning for a synthesized answer, or "
-                "honcho_search to query raw conversation excerpts."
-            ),
-        }
-
    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
        """Record the conversation turn in Honcho (non-blocking).

@@ -1129,15 +1068,13 @@ class HonchoMemoryProvider(MemoryProvider):
            return

        msg_limit = self._config.message_max_chars if self._config else 25000
-        clean_user_content = sanitize_context(user_content or "").strip()
-        clean_assistant_content = sanitize_context(assistant_content or "").strip()

        def _sync():
            try:
                session = self._manager.get_or_create(self._session_key)
-                for chunk in self._chunk_message(clean_user_content, msg_limit):
+                for chunk in self._chunk_message(user_content, msg_limit):
                    session.add_message("user", chunk)
-                for chunk in self._chunk_message(clean_assistant_content, msg_limit):
+                for chunk in self._chunk_message(assistant_content, msg_limit):
                    session.add_message("assistant", chunk)
                self._manager._flush_session(session)
            except Exception as e:
@@ -1150,20 +1087,8 @@ class HonchoMemoryProvider(MemoryProvider):
        )
        self._sync_thread.start()

-    def on_memory_write(
-        self,
-        action: str,
-        target: str,
-        content: str,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """Mirror built-in user profile writes as Honcho conclusions.
-
-        ``metadata`` is accepted for compatibility with the write-origin
-        work landed in main (commit 6a957a74); it's not yet threaded into
-        the Honcho conclusion payload.  Left as a follow-up so this PR
-        stays focused on the 7-PR consolidation and its review follow-ups.
-        """
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in user profile writes as Honcho conclusions."""
        if action != "add" or target != "user" or not content:
            return
        if self._cron_skipped:
@@ -1229,7 +1154,7 @@ class HonchoMemoryProvider(MemoryProvider):
                    return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
                card = self._manager.get_peer_card(self._session_key, peer=peer)
                if not card:
-                    return json.dumps(self._empty_profile_hint(peer))
+                    return json.dumps({"result": "No profile facts available yet."})
                return json.dumps({"result": card})

            elif tool_name == "honcho_search":
@@ -273,38 +273,9 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:


 def _resolve_api_key(cfg: dict) -> str:
-    """Resolve API key with host -> root -> env fallback.
-
-    For self-hosted instances configured with ``baseUrl`` instead of an API
-    key, returns ``"local"`` so that credential guards throughout the CLI
-    don't reject a valid configuration.  The ``baseUrl`` is scheme-validated
-    (http/https only) so that a typo like ``baseUrl: true`` can't silently
-    pass the guard.  Schemeless strings that look like host:port (legacy
-    config shapes, e.g. ``localhost:8000``) still pass — the Honcho SDK
-    will reject them itself with a clearer error than ours.
-    """
+    """Resolve API key with host -> root -> env fallback."""
    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
-    key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
-    if not key:
-        base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
-        base_url = (base_url or "").strip()
-        if base_url:
-            from urllib.parse import urlparse
-            try:
-                parsed = urlparse(base_url)
-            except (TypeError, ValueError):
-                parsed = None
-            if parsed and parsed.scheme in ("http", "https") and parsed.netloc:
-                return "local"
-            # Schemeless but looks like a host (contains '.' or ':' and isn't
-            # a boolean literal): let it through so legacy configs don't
-            # regress into "no API key configured" when they previously worked.
-            lowered = base_url.lower()
-            if lowered not in ("true", "false", "none", "null") and any(
-                c in base_url for c in ".:"
-            ) and not base_url.isdigit():
-                return "local"
-    return key
+    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")


 def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
@@ -16,7 +16,6 @@ from __future__ import annotations
 import json
 import os
 import logging
-import hashlib
 from dataclasses import dataclass, field
 from pathlib import Path

@@ -28,6 +27,7 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"


@@ -53,11 +53,6 @@ def resolve_active_host() -> str:
    return HOST


-def resolve_global_config_path() -> Path:
-    """Return the shared Honcho config path for the current HOME."""
-    return Path.home() / ".honcho" / "config.json"
-
-
 def resolve_config_path() -> Path:
    """Return the active Honcho config path.

@@ -77,7 +72,7 @@ def resolve_config_path() -> Path:
    if default_path != local_path and default_path.exists():
        return default_path

-    return resolve_global_config_path()
+    return GLOBAL_CONFIG_PATH


 _RECALL_MODE_ALIASES = {"auto": "hybrid"}
@@ -143,15 +138,6 @@ def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] |
    return None


-# Default HTTP timeout (seconds) applied when no explicit timeout is
-# configured via HonchoClientConfig.timeout, honcho.timeout / requestTimeout,
-# or HONCHO_TIMEOUT. Honcho calls happen on the post-response path of
-# run_conversation; without a cap the agent can block indefinitely when
-# the Honcho backend is unreachable, preventing the gateway from
-# delivering the already-generated response.
-_DEFAULT_HTTP_TIMEOUT = 30.0
-
-
 def _resolve_optional_float(*values: Any) -> float | None:
    """Return the first non-empty value coerced to a positive float."""
    for value in values:
@@ -240,13 +226,6 @@ class HonchoClientConfig:
    # Identity
    peer_name: str | None = None
    ai_peer: str = "hermes"
-    # When True, ``peer_name`` wins over any gateway-supplied runtime
-    # identity (Telegram UID, Discord ID, …) when resolving the user peer.
-    # This keeps memory unified across platforms for single-user deployments
-    # where Honcho's one peer-name is an unambiguous identity — otherwise
-    # each platform would fork memory into its own peer (#14984).  Default
-    # ``False`` preserves existing multi-user behaviour.
-    pin_peer_name: bool = False
    # Toggles
    enabled: bool = False
    save_messages: bool = True
@@ -441,11 +420,6 @@ class HonchoClientConfig:
            timeout=timeout,
            peer_name=host_block.get("peerName") or raw.get("peerName"),
            ai_peer=ai_peer,
-            pin_peer_name=_resolve_bool(
-                host_block.get("pinPeerName"),
-                raw.get("pinPeerName"),
-                default=False,
-            ),
            enabled=enabled,
            save_messages=save_messages,
            write_frequency=write_frequency,
@@ -548,39 +522,6 @@ class HonchoClientConfig:
            pass
        return None

-    # Honcho enforces a 100-char limit on session IDs. Long gateway session keys
-    # (Matrix "!room:server" + thread event IDs, Telegram supergroup reply
-    # chains, Slack thread IDs with long workspace prefixes) can overflow this
-    # limit after sanitization; the Honcho API then rejects every call for that
-    # session with "session_id too long". See issue #13868.
-    _HONCHO_SESSION_ID_MAX_LEN = 100
-    _HONCHO_SESSION_ID_HASH_LEN = 8
-
-    @classmethod
-    def _enforce_session_id_limit(cls, sanitized: str, original: str) -> str:
-        """Truncate a sanitized session ID to Honcho's 100-char limit.
-
-        The common case (short keys) short-circuits with no modification.
-        For over-limit keys, keep a prefix of the sanitized ID and append a
-        deterministic ``-<sha256 prefix>`` suffix so two distinct long keys
-        that share a leading segment don't collide onto the same truncated ID.
-        The hash is taken over the *original* pre-sanitization key, so two
-        inputs that sanitize to the same string still collide intentionally
-        (same logical session), but two inputs that only share a prefix do not.
-        """
-        max_len = cls._HONCHO_SESSION_ID_MAX_LEN
-        if len(sanitized) <= max_len:
-            return sanitized
-
-        hash_len = cls._HONCHO_SESSION_ID_HASH_LEN
-        digest = hashlib.sha256(original.encode("utf-8")).hexdigest()[:hash_len]
-        # max_len - hash_len - 1 (for the '-' separator) chars of the sanitized
-        # prefix, then '-<hash>'. Strip any trailing hyphen from the prefix so
-        # the result doesn't double up on separators.
-        prefix_len = max_len - hash_len - 1
-        prefix = sanitized[:prefix_len].rstrip("-")
-        return f"{prefix}-{digest}"
-
    def resolve_session_name(
        self,
        cwd: str | None = None,
@@ -625,7 +566,7 @@ class HonchoClientConfig:
        if gateway_session_key:
            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
            if sanitized:
-                return self._enforce_session_id_limit(sanitized, gateway_session_key)
+                return sanitized

        # per-session: inherit Hermes session_id (new Honcho session each run)
        if self.session_strategy == "per-session" and session_id:
@@ -705,11 +646,6 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
        except Exception:
            pass

-    # Fall back to the default so an unconfigured install cannot hang
-    # indefinitely on a stalled Honcho request.
-    if resolved_timeout is None:
-        resolved_timeout = _DEFAULT_HTTP_TIMEOUT
-
    if resolved_base_url:
        logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
    else:
@@ -95,7 +95,6 @@ class HonchoSessionManager:
        self._config = config
        self._runtime_user_peer_name = runtime_user_peer_name
        self._cache: dict[str, HonchoSession] = {}
-        self._cache_lock = threading.RLock()
        self._peers_cache: dict[str, Any] = {}
        self._sessions_cache: dict[str, Any] = {}

@@ -274,35 +273,17 @@ class HonchoSessionManager:
        Returns:
            The session.
        """
-        with self._cache_lock:
-            if key in self._cache:
-                logger.debug("Local session cache hit: %s", key)
-                return self._cache[key]
+        if key in self._cache:
+            logger.debug("Local session cache hit: %s", key)
+            return self._cache[key]

-        # Determine peer IDs — no lock needed (read-only, no shared state mutation).
-        # Gateway sessions normally use the runtime user identity (the
-        # platform-native ID: Telegram UID, Discord snowflake, Slack user,
-        # etc.) so multi-user bots scope memory per user.  For a single-user
-        # deployment the config-supplied ``peer_name`` is an unambiguous
-        # identity and we should keep it unified across platforms — see
-        # #14984.  Opt into that with ``hosts.<host>.pinPeerName: true`` in
-        # ``honcho.json`` (or root-level ``pinPeerName: true``).
-        # `is True` (not `bool(...)`) is deliberate: several multi-user tests
-        # pass a ``MagicMock`` for ``config`` where ``mock.pin_peer_name``
-        # silently returns another MagicMock — truthy by default.  Requiring
-        # strict ``True`` keeps pinning as opt-in even for callers that
-        # haven't updated their mocks yet; real configs built via
-        # ``from_global_config`` always produce a proper boolean.
-        pin_peer_name = (
-            self._config is not None
-            and bool(getattr(self._config, "peer_name", None))
-            and getattr(self._config, "pin_peer_name", False) is True
-        )
-        if self._runtime_user_peer_name and not pin_peer_name:
+        # Gateway sessions should use the runtime user identity when available.
+        if self._runtime_user_peer_name:
            user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
        elif self._config and self._config.peer_name:
            user_peer_id = self._sanitize_id(self._config.peer_name)
        else:
+            # Fallback: derive from session key
            parts = key.split(":", 1)
            channel = parts[0] if len(parts) > 1 else "default"
            chat_id = parts[1] if len(parts) > 1 else key
@@ -312,14 +293,19 @@ class HonchoSessionManager:
            self._config.ai_peer if self._config else "hermes-assistant"
        )

-        # All expensive I/O outside the lock — Honcho's persistence is source of truth
+        # Sanitize session ID for Honcho
        honcho_session_id = self._sanitize_id(key)
+
+        # Get or create peers
        user_peer = self._get_or_create_peer(user_peer_id)
        assistant_peer = self._get_or_create_peer(assistant_peer_id)
+
+        # Get or create Honcho session
        honcho_session, existing_messages = self._get_or_create_honcho_session(
            honcho_session_id, user_peer, assistant_peer
        )

+        # Convert Honcho messages to local format
        local_messages = []
        for msg in existing_messages:
            role = "assistant" if msg.peer_id == assistant_peer_id else "user"
@@ -327,9 +313,10 @@ class HonchoSessionManager:
                "role": role,
                "content": msg.content,
                "timestamp": msg.created_at.isoformat() if msg.created_at else "",
-                "_synced": True,
+                "_synced": True,  # Already in Honcho
            })

+        # Create local session wrapper with existing messages
        session = HonchoSession(
            key=key,
            user_peer_id=user_peer_id,
@@ -338,9 +325,7 @@ class HonchoSessionManager:
            messages=local_messages,
        )

-        # Write to cache under lock — only one writer wins
-        with self._cache_lock:
-            self._cache[key] = session
+        self._cache[key] = session
        return session

    def _flush_session(self, session: HonchoSession) -> bool:
@@ -371,15 +356,13 @@ class HonchoSessionManager:
            for msg in new_messages:
                msg["_synced"] = True
            logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
-            with self._cache_lock:
-                self._cache[session.key] = session
+            self._cache[session.key] = session
            return True
        except Exception as e:
            for msg in new_messages:
                msg["_synced"] = False
            logger.error("Failed to sync messages to Honcho: %s", e)
-            with self._cache_lock:
-                self._cache[session.key] = session
+            self._cache[session.key] = session
            return False

    def _async_writer_loop(self) -> None:
@@ -451,9 +434,7 @@ class HonchoSessionManager:
        Called at session end for "session" write_frequency, or to force
        a sync before process exit regardless of mode.
        """
-        with self._cache_lock:
-            sessions = list(self._cache.values())
-        for session in sessions:
+        for session in list(self._cache.values()):
            try:
                self._flush_session(session)
            except Exception as e:
@@ -478,10 +459,9 @@ class HonchoSessionManager:

    def delete(self, key: str) -> bool:
        """Delete a session from local cache."""
-        with self._cache_lock:
-            if key in self._cache:
-                del self._cache[key]
-                return True
+        if key in self._cache:
+            del self._cache[key]
+            return True
        return False

    def new_session(self, key: str) -> HonchoSession:
@@ -493,25 +473,20 @@ class HonchoSessionManager:
        """
        import time

-        # Hold the reentrant lock across get_or_create so a concurrent caller
-        # can't observe the (old-popped, new-not-yet-inserted) gap and create
-        # its own session under the raw key.  `_cache_lock` is an RLock so
-        # nested reacquisition inside get_or_create is safe.
-        with self._cache_lock:
-            # Remove old session from caches (but don't delete from Honcho)
-            old_session = self._cache.pop(key, None)
-            if old_session:
-                self._sessions_cache.pop(old_session.honcho_session_id, None)
+        # Remove old session from caches (but don't delete from Honcho)
+        old_session = self._cache.pop(key, None)
+        if old_session:
+            self._sessions_cache.pop(old_session.honcho_session_id, None)

-            # Create new session with timestamp suffix
-            timestamp = int(time.time())
-            new_key = f"{key}:{timestamp}"
+        # Create new session with timestamp suffix
+        timestamp = int(time.time())
+        new_key = f"{key}:{timestamp}"

-            # get_or_create will create a fresh session
-            session = self.get_or_create(new_key)
+        # get_or_create will create a fresh session
+        session = self.get_or_create(new_key)

-            # Cache under the original key so callers find it by the expected name
-            self._cache[key] = session
+        # Cache under the original key so callers find it by the expected name
+        self._cache[key] = session

        logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
        return session
@@ -1,307 +0,0 @@
-# providers/
-
-Single source of truth for every inference provider Hermes knows about.
-
-Each provider is declared once here as a `ProviderProfile`. Every other layer —
-auth resolution, transport kwargs, model listing, runtime routing — reads from
-these profiles instead of maintaining its own parallel data.
-
---
-
-## Directory layout
-
-```
-providers/
-├── base.py           ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
-├── __init__.py       Registry: register_provider(), get_provider_profile()
-├── README.md         This file
-│
-├── # Simple providers — just identity + auth + endpoint
-├── alibaba.py        Alibaba Cloud DashScope
-├── arcee.py          Arcee AI
-├── bedrock.py        AWS Bedrock  (api_mode=bedrock_converse)
-├── deepseek.py       DeepSeek
-├── huggingface.py    Hugging Face Inference API
-├── kilocode.py       Kilo Code
-├── minimax.py        MiniMax (international + CN)
-├── nvidia.py         NVIDIA NIM  (default_max_tokens=16384)
-├── ollama_cloud.py   Ollama Cloud
-├── stepfun.py        StepFun
-├── xiaomi.py         Xiaomi MiMo
-├── xai.py            xAI Grok  (api_mode=codex_responses)
-├── zai.py            Z.AI / GLM
-│
-├── # Medium — one or two quirks
-├── anthropic.py      Native Anthropic  (x-api-key header, api_mode=anthropic_messages)
-├── copilot.py        GitHub Copilot  (auth_type=copilot, reasoning per model)
-├── copilot_acp.py    Copilot ACP subprocess  (api_mode=copilot_acp)
-├── custom.py         Custom/Ollama local  (think=false, num_ctx)
-├── gemini.py         Google Gemini AI Studio + Cloud Code OAuth
-├── kimi.py           Kimi Coding  (OMIT_TEMPERATURE, thinking, dual endpoint)
-├── openai_codex.py   OpenAI Codex OAuth  (api_mode=codex_responses)
-├── opencode.py       OpenCode Zen + Go  (per-model api_mode routing)
-│
-├── # Complex — subclasses with multiple overrides
-├── nous.py           Nous Portal  (tags, attribution, reasoning omit-when-disabled)
-├── openrouter.py     OpenRouter  (provider preferences, public model fetch)
-├── qwen.py           Qwen OAuth  (message normalization, cache_control, vl_hires)
-└── vercel.py         Vercel AI Gateway  (attribution headers, reasoning passthrough)
-```
-
---
-
-## ProviderProfile fields
-
-```python
-@dataclass
-class ProviderProfile:
-    # Identity
-    name: str                    # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
-    api_mode: str                # "chat_completions" | "anthropic_messages" |
-                                 # "codex_responses" | "bedrock_converse" | "copilot_acp"
-    aliases: tuple               # alternate names resolved by get_provider_profile()
-
-    # Auth & endpoints
-    env_vars: tuple              # env var names holding the API key, in priority order
-    base_url: str                # default inference endpoint
-    models_url: str              # explicit models endpoint; falls back to {base_url}/models
-                                 # set when the models catalog lives at a different URL
-                                 # (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
-    auth_type: str               # "api_key" | "oauth_device_code" | "oauth_external" |
-                                 # "copilot" | "aws" | "external_process"
-
-    # Client-level quirks
-    default_headers: dict        # extra HTTP headers sent on every request
-
-    # Request-level quirks
-    fixed_temperature: Any       # None = use caller's default; OMIT_TEMPERATURE = don't send
-    default_max_tokens: int|None # inject max_tokens when caller omits it
-    default_aux_model: str       # cheap model for auxiliary tasks (compression, vision, etc.)
-                                 # empty string = use main model (default)
-```
-
---
-
-## Hooks (override in a subclass)
-
-| Method | When to override |
-|--------|-----------------|
-| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
-| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
-| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
-| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
-
-All hooks have safe defaults — only override what differs from the base.
-
---
-
-## How to add a new provider
-
-### 1. Simple (standard OpenAI-compatible endpoint)
-
-```python
-# providers/myprovider.py
-from providers import register_provider
-from providers.base import ProviderProfile
-
-myprovider = ProviderProfile(
-    name="myprovider",           # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
-    aliases=("my-provider", "myp"),
-    api_mode="chat_completions",
-    env_vars=("MYPROVIDER_API_KEY",),
-    base_url="https://api.myprovider.com/v1",
-    auth_type="api_key",
-)
-
-register_provider(myprovider)
-```
-
-The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
-with Bearer auth automatically. No override needed for standard `/v1/models`.
-
-### 2. With quirks (subclass)
-
-```python
-# providers/myprovider.py
-from typing import Any
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class MyProviderProfile(ProviderProfile):
-    """My provider — custom reasoning header."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        **ctx: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-        if reasoning_config:
-            extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
-        return extra_body, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        # Override only if your endpoint differs from standard /v1/models
-        return super().fetch_models(api_key=api_key, timeout=timeout)
-
-
-myprovider = MyProviderProfile(
-    name="myprovider",
-    aliases=("myp",),
-    env_vars=("MYPROVIDER_API_KEY",),
-    base_url="https://api.myprovider.com/v1",
-)
-
-register_provider(myprovider)
-```
-
-### 3. Wire it up
-
-After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
-`run_agent.py` once you've verified parity against the legacy flag path. Start
-with a simple provider (no message prep, no reasoning quirks) and work up.
-
---
-
-## fetch_models contract
-
-```python
-def fetch_models(
-    self,
-    *,
-    api_key: str | None = None,
-    timeout: float = 8.0,
-) -> list[str] | None:
-    ...
-```
-
- Returns `list[str]`: model IDs from the provider's live endpoint.
- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
-  or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
- Never raises — swallow exceptions and return `None`.
- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
-  standard OpenAI-compatible provider.
-
-**Override when:**
- Auth header is not `Bearer` (Anthropic: `x-api-key`)
- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
- Response format differs (extra wrapping, non-standard `id` field)
- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
- Filtering needed post-fetch (only tool-capable models, etc.)
-
-Use `models_url` instead of overriding when the only difference is the URL:
-
-```python
-# No subclass needed — just set models_url
-myprovider = ProviderProfile(
-    name="myprovider",
-    base_url="https://api.myprovider.com/v1",
-    models_url="https://catalog.myprovider.com/models",  # different host
-)
-```
-
---
-
-## Debugging
-
-### Check if a provider resolves
-
-```python
-from providers import get_provider_profile
-
-p = get_provider_profile("myprovider")
-print(p)           # ProviderProfile(name='myprovider', ...)
-print(p.base_url)
-print(p.api_mode)
-```
-
-### Check all registered providers
-
-```python
-from providers import _REGISTRY
-print(list(_REGISTRY.keys()))
-```
-
-### Test live model fetch
-
-```python
-import os
-from providers import get_provider_profile
-
-p = get_provider_profile("myprovider")
-key = os.getenv("MYPROVIDER_API_KEY")
-models = p.fetch_models(api_key=key, timeout=5.0)
-print(models)      # list of model IDs, or None on failure
-```
-
-### Test alias resolution
-
-```python
-from providers import get_provider_profile
-
-# All of these should return the same profile
-assert get_provider_profile("openrouter").name == "openrouter"
-assert get_provider_profile("or").name == "openrouter"
-```
-
-### Run the provider test suite
-
-```bash
-# From the repo root
-source venv/bin/activate
-python -m pytest tests/providers/ -v
-```
-
-### Check ruff + ty compliance
-
-```bash
-source venv/bin/activate
-ruff format providers/*.py
-ruff check providers/*.py --select UP,E,F,I,W
-ty check providers/*.py
-```
-
---
-
-## Common mistakes
-
-**Wrong `name`** — must be the same string that appears as the key in
-`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
-into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
-with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
-
-**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
-tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
-everything else is treated as an API key. Getting this wrong causes the doctor
-health check to send a URL string as a Bearer token.
-
-**Wrong `base_url`** — several providers have non-obvious paths:
-`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
-is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
-for new providers, so it must be correct for auth resolution to work.
-
-**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
-`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
-must set it explicitly.
-
-**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
-over the package and imports each module, but only if `register_provider()` is
-called at module level. Without it the profile is never in `_REGISTRY`.
-
-**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
-model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
-
-**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
-`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
-`ValueError: not enough values to unpack` in the transport.
-
-**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
-top_level_dict)`. Returning a flat dict or swapping the order silently sends
-fields to the wrong place.
@@ -1,76 +0,0 @@
-"""Provider module registry.
-
-Auto-discovers ProviderProfile instances from providers/*.py modules.
-Each module should define a module-level PROVIDER or PROVIDERS list.
-
-Usage:
-    from providers import get_provider_profile
-    profile = get_provider_profile("nvidia")  # returns ProviderProfile or None
-    profile = get_provider_profile("kimi")    # checks name + aliases
-"""
-
-from __future__ import annotations
-
-from providers.base import OMIT_TEMPERATURE, ProviderProfile  # noqa: F401
-
-_REGISTRY: dict[str, ProviderProfile] = {}
-_ALIASES: dict[str, str] = {}
-_discovered = False
-
-
-def register_provider(profile: ProviderProfile) -> None:
-    """Register a provider profile by name and aliases."""
-    _REGISTRY[profile.name] = profile
-    for alias in profile.aliases:
-        _ALIASES[alias] = profile.name
-
-
-def get_provider_profile(name: str) -> ProviderProfile | None:
-    """Look up a provider profile by name or alias.
-
-    Returns None if the provider has no profile (falls back to generic).
-    """
-    if not _discovered:
-        _discover_providers()
-    canonical = _ALIASES.get(name, name)
-    return _REGISTRY.get(canonical)
-
-
-def list_providers() -> list[ProviderProfile]:
-    """Return all registered provider profiles (one per canonical name)."""
-    if not _discovered:
-        _discover_providers()
-    # Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
-    seen: set[int] = set()
-    result: list[ProviderProfile] = []
-    for profile in _REGISTRY.values():
-        pid = id(profile)
-        if pid not in seen:
-            seen.add(pid)
-            result.append(profile)
-    return result
-
-
-def _discover_providers() -> None:
-    """Import all provider modules to trigger registration."""
-    global _discovered
-    if _discovered:
-        return
-    _discovered = True
-
-    import importlib
-    import pkgutil
-
-    import providers as _pkg
-
-    for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
-        if modname.startswith("_") or modname == "base":
-            continue
-        try:
-            importlib.import_module(f"providers.{modname}")
-        except ImportError as e:
-            import logging
-
-            logging.getLogger(__name__).warning(
-                "Failed to import provider module %s: %s", modname, e
-            )
@@ -1,13 +0,0 @@
-"""Alibaba Cloud DashScope provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-alibaba = ProviderProfile(
-    name="alibaba",
-    aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
-    env_vars=("DASHSCOPE_API_KEY",),
-    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
-)
-
-register_provider(alibaba)
@@ -1,52 +0,0 @@
-"""Native Anthropic provider profile."""
-
-import json
-import logging
-import urllib.request
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-logger = logging.getLogger(__name__)
-
-
-class AnthropicProfile(ProviderProfile):
-    """Native Anthropic — uses x-api-key header, not Bearer."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Anthropic uses x-api-key header and anthropic-version."""
-        if not api_key:
-            return None
-        try:
-            req = urllib.request.Request("https://api.anthropic.com/v1/models")
-            req.add_header("x-api-key", api_key)
-            req.add_header("anthropic-version", "2023-06-01")
-            req.add_header("Accept", "application/json")
-            with urllib.request.urlopen(req, timeout=timeout) as resp:
-                data = json.loads(resp.read().decode())
-            return [
-                m["id"]
-                for m in data.get("data", [])
-                if isinstance(m, dict) and "id" in m
-            ]
-        except Exception as exc:
-            logger.debug("fetch_models(anthropic): %s", exc)
-            return None
-
-
-anthropic = AnthropicProfile(
-    name="anthropic",
-    aliases=("claude", "claude-oauth", "claude-code"),
-    api_mode="anthropic_messages",
-    env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
-    base_url="https://api.anthropic.com",
-    auth_type="api_key",
-    default_aux_model="claude-haiku-4-5-20251001",
-)
-
-register_provider(anthropic)
@@ -1,13 +0,0 @@
-"""Arcee AI provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-arcee = ProviderProfile(
-    name="arcee",
-    aliases=("arcee-ai", "arceeai"),
-    env_vars=("ARCEEAI_API_KEY",),
-    base_url="https://api.arcee.ai/api/v1",
-)
-
-register_provider(arcee)
@@ -1,165 +0,0 @@
-"""Provider profile base class.
-
-A ProviderProfile declares everything about an inference provider in one place:
-auth, endpoints, client quirks, request-time quirks. The transport reads this
-instead of receiving 20+ boolean flags.
-
-Provider profiles are DECLARATIVE — they describe the provider's behavior.
-They do NOT own client construction, credential rotation, or streaming.
-Those stay on AIAgent.
-"""
-
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass, field
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-# Sentinel for "omit temperature entirely" (Kimi: server manages it)
-OMIT_TEMPERATURE = object()
-
-
-@dataclass
-class ProviderProfile:
-    """Base provider profile — subclass or instantiate with overrides."""
-
-    # ── Identity ─────────────────────────────────────────────
-    name: str
-    api_mode: str = "chat_completions"
-    aliases: tuple = ()
-
-    # ── Human-readable metadata ───────────────────────────────
-    display_name: str = ""       # e.g. "GMI Cloud" — shown in picker/labels
-    description: str = ""        # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
-    signup_url: str = ""         # e.g. "https://www.gmicloud.ai/" — shown during setup
-
-    # ── Auth & endpoints ─────────────────────────────────────
-    env_vars: tuple = ()
-    base_url: str = ""
-    models_url: str = ""  # explicit models endpoint; falls back to {base_url}/models
-    auth_type: str = "api_key"   # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
-
-    # ── Model catalog ─────────────────────────────────────────
-    # fallback_models: curated list shown in /model picker when live fetch fails.
-    # Only agentic models that support tool calling should appear here.
-    fallback_models: tuple = ()
-
-    # hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
-    # e.g. "api.gmi-serving.com". Derived from base_url when empty.
-    hostname: str = ""
-
-    # ── Client-level quirks (set once at client construction) ─
-    default_headers: dict[str, str] = field(default_factory=dict)
-
-    # ── Request-level quirks ─────────────────────────────────
-    # Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
-    fixed_temperature: Any = None
-    default_max_tokens: int | None = None
-    default_aux_model: str = (
-        ""  # cheap model for auxiliary tasks (compression, vision, etc.)
-    )
-    # empty = use main model
-
-    # ── Hooks (override in subclass for complex providers) ───
-
-    def get_hostname(self) -> str:
-        """Return the provider's base hostname for URL-based detection.
-
-        Uses self.hostname if set explicitly, otherwise derives it from base_url.
-        e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com'
-        """
-        if self.hostname:
-            return self.hostname
-        if self.base_url:
-            from urllib.parse import urlparse
-            return urlparse(self.base_url).hostname or ""
-        return ""
-
-    def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """Provider-specific message preprocessing.
-
-        Called AFTER codex field sanitization, BEFORE developer role swap.
-        Default: pass-through.
-        """
-        return messages
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context: Any
-    ) -> dict[str, Any]:
-        """Provider-specific extra_body fields.
-
-        Merged into the API kwargs extra_body. Default: empty dict.
-        """
-        return {}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        **context: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Provider-specific kwargs split between extra_body and top-level api_kwargs.
-
-        Returns (extra_body_additions, top_level_kwargs).
-        The transport merges extra_body_additions into extra_body, and
-        top_level_kwargs directly into api_kwargs.
-
-        This split exists because some providers put reasoning config in
-        extra_body (OpenRouter: extra_body.reasoning) while others put it
-        as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
-
-        Default: ({}, {}).
-        """
-        return {}, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Fetch the live model list from the provider's models endpoint.
-
-        Returns a list of model ID strings, or None if the fetch failed or
-        the provider does not support live model listing.
-
-        Resolution order for the endpoint URL:
-          1. self.models_url  (explicit override — use when the models
-             endpoint differs from the inference base URL, e.g. OpenRouter
-             exposes a public catalog at /api/v1/models while inference is
-             at /api/v1)
-          2. self.base_url + "/models"  (standard OpenAI-compat fallback)
-
-        The default implementation sends Bearer auth when api_key is given
-        and forwards self.default_headers. Override to customise auth, path,
-        response shape, or to return None for providers with no REST catalog.
-
-        Callers must always fall back to the static _PROVIDER_MODELS list
-        when this returns None.
-        """
-        url = (self.models_url or "").strip()
-        if not url:
-            if not self.base_url:
-                return None
-            url = self.base_url.rstrip("/") + "/models"
-
-        import json
-        import urllib.request
-
-        req = urllib.request.Request(url)
-        if api_key:
-            req.add_header("Authorization", f"Bearer {api_key}")
-        req.add_header("Accept", "application/json")
-        for k, v in self.default_headers.items():
-            req.add_header(k, v)
-
-        try:
-            with urllib.request.urlopen(req, timeout=timeout) as resp:
-                data = json.loads(resp.read().decode())
-            items = data if isinstance(data, list) else data.get("data", [])
-            return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
-        except Exception as exc:
-            logger.debug("fetch_models(%s): %s", self.name, exc)
-            return None
@@ -1,29 +0,0 @@
-"""AWS Bedrock provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class BedrockProfile(ProviderProfile):
-    """AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Bedrock model listing requires AWS SDK, not a REST call."""
-        return None
-
-
-bedrock = BedrockProfile(
-    name="bedrock",
-    aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
-    api_mode="bedrock_converse",
-    env_vars=(),  # AWS SDK credentials — not env vars
-    base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
-    auth_type="aws_sdk",
-)
-
-register_provider(bedrock)
@@ -1,58 +0,0 @@
-"""Copilot / GitHub Models provider profile.
-
-Copilot uses per-model api_mode routing:
-  - GPT-5+ / Codex models → codex_responses
-  - Claude models → anthropic_messages
-  - Everything else → chat_completions (this profile covers that subset)
-
-Key quirks for the chat_completions subset:
-  - Editor attribution headers (via copilot_default_headers())
-  - GitHub Models reasoning extra_body (model-catalog gated)
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CopilotProfile(ProviderProfile):
-    """GitHub Copilot / GitHub Models — editor headers + reasoning."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        model: str | None = None,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **ctx,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-        if supports_reasoning and model:
-            try:
-                from hermes_cli.models import github_model_reasoning_efforts
-
-                supported_efforts = github_model_reasoning_efforts(model)
-                if supported_efforts and reasoning_config:
-                    effort = reasoning_config.get("effort", "medium")
-                    # Normalize non-standard effort levels to the nearest supported
-                    if effort == "xhigh":
-                        effort = "high"
-                    if effort in supported_efforts:
-                        extra_body["reasoning"] = {"effort": effort}
-                elif supported_efforts:
-                    extra_body["reasoning"] = {"effort": "medium"}
-            except Exception:
-                pass
-        return extra_body, {}
-
-
-copilot = CopilotProfile(
-    name="copilot",
-    aliases=("github-copilot", "github-models", "github-model", "github"),
-    env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
-    base_url="https://api.githubcopilot.com",
-    auth_type="copilot",
-)
-
-register_provider(copilot)
@@ -1,34 +0,0 @@
-"""GitHub Copilot ACP provider profile.
-
-copilot-acp uses an external ACP subprocess — NOT the standard
-transport. api_mode="copilot_acp" is handled separately in run_agent.py.
-The profile captures auth + endpoint metadata for registry migration.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CopilotACPProfile(ProviderProfile):
-    """GitHub Copilot ACP — external process, no REST models endpoint."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Model listing is handled by the ACP subprocess."""
-        return None
-
-
-copilot_acp = CopilotACPProfile(
-    name="copilot-acp",
-    aliases=("github-copilot-acp", "copilot-acp-agent"),
-    api_mode="chat_completions",  # ACP subprocess uses chat_completions routing
-    env_vars=(),  # Managed by ACP subprocess
-    base_url="acp://copilot",  # ACP internal scheme
-    auth_type="external_process",
-)
-
-register_provider(copilot_acp)
@@ -1,71 +0,0 @@
-"""Custom / Ollama (local) provider profile.
-
-Covers any endpoint registered as provider="custom", including local
-Ollama instances. Key quirks:
-  - ollama_num_ctx → extra_body.options.num_ctx (local context window)
-  - reasoning_config disabled → extra_body.think = False
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CustomProfile(ProviderProfile):
-    """Custom/Ollama local provider — think=false and num_ctx support."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        ollama_num_ctx: int | None = None,
-        **ctx: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-
-        # Ollama context window
-        if ollama_num_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_num_ctx
-            extra_body["options"] = options
-
-        # Disable thinking when reasoning is turned off
-        if reasoning_config and isinstance(reasoning_config, dict):
-            _effort = (reasoning_config.get("effort") or "").strip().lower()
-            _enabled = reasoning_config.get("enabled", True)
-            if _effort == "none" or _enabled is False:
-                extra_body["think"] = False
-
-        return extra_body, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Custom/Ollama: base_url is user-configured; fetch if set."""
-        if not self.base_url:
-            return None
-        return super().fetch_models(api_key=api_key, timeout=timeout)
-
-
-custom = CustomProfile(
-    name="custom",
-    aliases=(
-        "ollama",
-        "local",
-        "lmstudio",
-        "lm-studio",
-        "lm_studio",
-        "vllm",
-        "llamacpp",
-        "llama.cpp",
-        "llama-cpp",
-    ),
-    env_vars=(),  # No fixed key — custom endpoint
-    base_url="",  # User-configured
-)
-
-register_provider(custom)
@@ -1,20 +0,0 @@
-"""DeepSeek provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-deepseek = ProviderProfile(
-    name="deepseek",
-    aliases=("deepseek-chat",),
-    env_vars=("DEEPSEEK_API_KEY",),
-    display_name="DeepSeek",
-    description="DeepSeek — native DeepSeek API",
-    signup_url="https://platform.deepseek.com/",
-    fallback_models=(
-        "deepseek-chat",
-        "deepseek-reasoner",
-    ),
-    base_url="https://api.deepseek.com/v1",
-)
-
-register_provider(deepseek)
@@ -1,34 +0,0 @@
-"""Google Gemini provider profiles.
-
-gemini:            Google AI Studio (API key) — uses GeminiNativeClient
-google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient
-
-Both report api_mode="chat_completions" but use custom native clients
-that bypass the standard OpenAI transport. The profile captures auth
-and endpoint metadata for auth.py / runtime_provider.py migration.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-gemini = ProviderProfile(
-    name="gemini",
-    aliases=("google", "google-gemini", "google-ai-studio"),
-    api_mode="chat_completions",
-    env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
-    base_url="https://generativelanguage.googleapis.com/v1beta",
-    auth_type="api_key",
-    default_aux_model="gemini-3-flash-preview",
-)
-
-google_gemini_cli = ProviderProfile(
-    name="google-gemini-cli",
-    aliases=("gemini-cli", "gemini-oauth"),
-    api_mode="chat_completions",
-    env_vars=(),  # OAuth — no API key
-    base_url="cloudcode-pa://google",  # Cloud Code Assist internal scheme
-    auth_type="oauth_external",
-)
-
-register_provider(gemini)
-register_provider(google_gemini_cli)
@@ -1,26 +0,0 @@
-"""GMI Cloud provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-gmi = ProviderProfile(
-    name="gmi",
-    aliases=("gmi-cloud", "gmicloud"),
-    display_name="GMI Cloud",
-    description="GMI Cloud — multi-model direct API (slash-form model IDs)",
-    signup_url="https://www.gmicloud.ai/",
-    env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
-    base_url="https://api.gmi-serving.com/v1",
-    auth_type="api_key",
-    default_aux_model="google/gemini-3.1-flash-lite-preview",
-    fallback_models=(
-        "zai-org/GLM-5.1-FP8",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
-        "google/gemini-3.1-flash-lite-preview",
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-    ),
-)
-
-register_provider(gmi)
@@ -1,20 +0,0 @@
-"""Hugging Face provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-huggingface = ProviderProfile(
-    name="huggingface",
-    aliases=("hf", "hugging-face", "huggingface-hub"),
-    env_vars=("HF_TOKEN",),
-    display_name="HuggingFace",
-    description="HuggingFace Inference API",
-    signup_url="https://huggingface.co/settings/tokens",
-    fallback_models=(
-        "Qwen/Qwen3.5-72B-Instruct",
-        "deepseek-ai/DeepSeek-V3.2",
-    ),
-    base_url="https://router.huggingface.co/v1",
-)
-
-register_provider(huggingface)
@@ -1,14 +0,0 @@
-"""Kilo Code provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-kilocode = ProviderProfile(
-    name="kilocode",
-    aliases=("kilo-code", "kilo", "kilo-gateway"),
-    env_vars=("KILOCODE_API_KEY",),
-    base_url="https://api.kilo.ai/api/gateway",
-    default_aux_model="google/gemini-3-flash-preview",
-)
-
-register_provider(kilocode)
@@ -1,71 +0,0 @@
-"""Kimi / Moonshot provider profiles.
-
-Kimi has dual endpoints:
-  - sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API)
-  - legacy keys → api.moonshot.ai/v1 (OpenAI chat completions)
-
-This module covers the chat_completions path (/v1 endpoint).
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import OMIT_TEMPERATURE, ProviderProfile
-
-
-class KimiProfile(ProviderProfile):
-    """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
-
-    def build_api_kwargs_extras(
-        self, *, reasoning_config: dict | None = None, **context
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Kimi uses extra_body.thinking + top-level reasoning_effort."""
-        extra_body = {}
-        top_level = {}
-
-        if not reasoning_config or not isinstance(reasoning_config, dict):
-            # No config → thinking enabled, default effort
-            extra_body["thinking"] = {"type": "enabled"}
-            top_level["reasoning_effort"] = "medium"
-            return extra_body, top_level
-
-        enabled = reasoning_config.get("enabled", True)
-        if enabled is False:
-            extra_body["thinking"] = {"type": "disabled"}
-            return extra_body, top_level
-
-        # Enabled
-        extra_body["thinking"] = {"type": "enabled"}
-        effort = (reasoning_config.get("effort") or "").strip().lower()
-        if effort in ("low", "medium", "high"):
-            top_level["reasoning_effort"] = effort
-        else:
-            top_level["reasoning_effort"] = "medium"
-
-        return extra_body, top_level
-
-
-kimi = KimiProfile(
-    name="kimi-coding",
-    aliases=("kimi", "moonshot", "kimi-for-coding"),
-    env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
-    base_url="https://api.moonshot.ai/v1",
-    fixed_temperature=OMIT_TEMPERATURE,
-    default_max_tokens=32000,
-    default_headers={"User-Agent": "hermes-agent/1.0"},
-    default_aux_model="kimi-k2-turbo-preview",
-)
-
-kimi_cn = KimiProfile(
-    name="kimi-coding-cn",
-    aliases=("kimi-cn", "moonshot-cn"),
-    env_vars=("KIMI_CN_API_KEY",),
-    base_url="https://api.moonshot.cn/v1",
-    fixed_temperature=OMIT_TEMPERATURE,
-    default_max_tokens=32000,
-    default_headers={"User-Agent": "hermes-agent/1.0"},
-    default_aux_model="kimi-k2-turbo-preview",
-)
-
-register_provider(kimi)
-register_provider(kimi_cn)
@@ -1,31 +0,0 @@
-"""MiniMax provider profiles (international + China).
-
-Both use anthropic_messages api_mode — their inference_base_url
-ends with /anthropic which triggers auto-detection to anthropic_messages.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-minimax = ProviderProfile(
-    name="minimax",
-    aliases=("mini-max",),
-    api_mode="anthropic_messages",
-    env_vars=("MINIMAX_API_KEY",),
-    base_url="https://api.minimax.io/anthropic",
-    auth_type="api_key",
-    default_aux_model="MiniMax-M2.7",
-)
-
-minimax_cn = ProviderProfile(
-    name="minimax-cn",
-    aliases=("minimax-china", "minimax_cn"),
-    api_mode="anthropic_messages",
-    env_vars=("MINIMAX_CN_API_KEY",),
-    base_url="https://api.minimaxi.com/anthropic",
-    auth_type="api_key",
-    default_aux_model="MiniMax-M2.7",
-)
-
-register_provider(minimax)
-register_provider(minimax_cn)
@@ -1,53 +0,0 @@
-"""Nous Portal provider profile."""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class NousProfile(ProviderProfile):
-    """Nous Portal — product tags, reasoning with Nous-specific omission."""
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context
-    ) -> dict[str, Any]:
-        return {"tags": ["product=hermes-agent"]}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **context,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Nous: passes full reasoning_config, but OMITS when disabled."""
-        extra_body = {}
-        if supports_reasoning:
-            if reasoning_config is not None:
-                rc = dict(reasoning_config)
-                if rc.get("enabled") is False:
-                    pass  # Nous omits reasoning when disabled
-                else:
-                    extra_body["reasoning"] = rc
-            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
-
-
-nous = NousProfile(
-    name="nous",
-    aliases=("nous-portal", "nousresearch"),
-    env_vars=("NOUS_API_KEY",),
-    display_name="Nous Research",
-    description="Nous Research — Hermes model family",
-    signup_url="https://nousresearch.com/",
-    fallback_models=(
-        "hermes-3-405b",
-        "hermes-3-70b",
-    ),
-    base_url="https://inference.nousresearch.com/v1",
-    auth_type="oauth_device_code",
-)
-
-register_provider(nous)
@@ -1,21 +0,0 @@
-"""NVIDIA NIM provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-nvidia = ProviderProfile(
-    name="nvidia",
-    aliases=("nvidia-nim",),
-    env_vars=("NVIDIA_API_KEY",),
-    display_name="NVIDIA NIM",
-    description="NVIDIA NIM — accelerated inference",
-    signup_url="https://build.nvidia.com/",
-    fallback_models=(
-        "nvidia/llama-3.1-nemotron-70b-instruct",
-        "nvidia/llama-3.3-70b-instruct",
-    ),
-    base_url="https://integrate.api.nvidia.com/v1",
-    default_max_tokens=16384,
-)
-
-register_provider(nvidia)
@@ -1,14 +0,0 @@
-"""Ollama Cloud provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-ollama_cloud = ProviderProfile(
-    name="ollama-cloud",
-    aliases=("ollama_cloud",),
-    default_aux_model="nemotron-3-nano:30b",
-    env_vars=("OLLAMA_API_KEY",),
-    base_url="https://ollama.com/v1",
-)
-
-register_provider(ollama_cloud)
@@ -1,15 +0,0 @@
-"""OpenAI Codex (Responses API) provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-openai_codex = ProviderProfile(
-    name="openai-codex",
-    aliases=("codex", "openai_codex"),
-    api_mode="codex_responses",
-    env_vars=(),  # OAuth external — no API key
-    base_url="https://chatgpt.com/backend-api/codex",
-    auth_type="oauth_external",
-)
-
-register_provider(openai_codex)
@@ -1,30 +0,0 @@
-"""OpenCode provider profiles (Zen + Go).
-
-Both use per-model api_mode routing:
-  - OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses,
-    everything else → chat_completions (this profile)
-  - OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions
-    (this profile)
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-opencode_zen = ProviderProfile(
-    name="opencode-zen",
-    aliases=("opencode", "opencode_zen", "zen"),
-    env_vars=("OPENCODE_ZEN_API_KEY",),
-    base_url="https://opencode.ai/zen/v1",
-    default_aux_model="gemini-3-flash",
-)
-
-opencode_go = ProviderProfile(
-    name="opencode-go",
-    aliases=("opencode_go", "go", "opencode-go-sub"),
-    env_vars=("OPENCODE_GO_API_KEY",),
-    base_url="https://opencode.ai/zen/go/v1",
-    default_aux_model="glm-5",
-)
-
-register_provider(opencode_zen)
-register_provider(opencode_go)
@@ -1,86 +0,0 @@
-"""OpenRouter provider profile."""
-
-import logging
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-logger = logging.getLogger(__name__)
-
-_CACHE: list[str] | None = None
-
-
-class OpenRouterProfile(ProviderProfile):
-    """OpenRouter aggregator — provider preferences, reasoning config passthrough."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Fetch from public OpenRouter catalog — no auth required.
-
-        Note: Tool-call capability filtering is applied by hermes_cli/models.py
-        via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
-        here. The picker early-returns via the dedicated openrouter path before
-        reaching this method, so filtering here would be unreachable.
-        """
-        global _CACHE  # noqa: PLW0603
-        if _CACHE is not None:
-            return _CACHE
-        try:
-            result = super().fetch_models(api_key=None, timeout=timeout)
-            if result is not None:
-                _CACHE = result
-            return result
-        except Exception as exc:
-            logger.debug("fetch_models(openrouter): %s", exc)
-            return None
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context: Any
-    ) -> dict[str, Any]:
-        body: dict[str, Any] = {}
-        prefs = context.get("provider_preferences")
-        if prefs:
-            body["provider"] = prefs
-        return body
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **context: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
-        extra_body: dict[str, Any] = {}
-        if supports_reasoning:
-            if reasoning_config is not None:
-                extra_body["reasoning"] = dict(reasoning_config)
-            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
-
-
-openrouter = OpenRouterProfile(
-    name="openrouter",
-    aliases=("or",),
-    env_vars=("OPENROUTER_API_KEY",),
-    display_name="OpenRouter",
-    description="OpenRouter — unified API for 200+ models",
-    signup_url="https://openrouter.ai/keys",
-    base_url="https://openrouter.ai/api/v1",
-    models_url="https://openrouter.ai/api/v1/models",
-    fallback_models=(
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-        "deepseek/deepseek-chat",
-        "google/gemini-3-flash-preview",
-        "qwen/qwen3-plus",
-    ),
-)
-
-register_provider(openrouter)
@@ -1,82 +0,0 @@
-"""Qwen Portal provider profile."""
-
-import copy
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class QwenProfile(ProviderProfile):
-    """Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
-
-    def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """Normalize content to list-of-dicts format.
-
-        Inject cache_control on system message.
-
-        Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
-        """
-        prepared = copy.deepcopy(messages)
-        if not prepared:
-            return prepared
-
-        for msg in prepared:
-            if not isinstance(msg, dict):
-                continue
-            content = msg.get("content")
-            if isinstance(content, str):
-                msg["content"] = [{"type": "text", "text": content}]
-            elif isinstance(content, list):
-                normalized_parts = []
-                for part in content:
-                    if isinstance(part, str):
-                        normalized_parts.append({"type": "text", "text": part})
-                    elif isinstance(part, dict):
-                        normalized_parts.append(part)
-                if normalized_parts:
-                    msg["content"] = normalized_parts
-
-        # Inject cache_control on the last part of the system message.
-        for msg in prepared:
-            if isinstance(msg, dict) and msg.get("role") == "system":
-                content = msg.get("content")
-                if (
-                    isinstance(content, list)
-                    and content
-                    and isinstance(content[-1], dict)
-                ):
-                    content[-1]["cache_control"] = {"type": "ephemeral"}
-                break
-
-        return prepared
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context
-    ) -> dict[str, Any]:
-        return {"vl_high_resolution_images": True}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        qwen_session_metadata: dict | None = None,
-        **context,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Qwen metadata goes to top-level api_kwargs, not extra_body."""
-        top_level = {}
-        if qwen_session_metadata:
-            top_level["metadata"] = qwen_session_metadata
-        return {}, top_level
-
-
-qwen = QwenProfile(
-    name="qwen-oauth",
-    aliases=("qwen", "qwen-portal", "qwen-cli"),
-    env_vars=("QWEN_API_KEY",),
-    base_url="https://portal.qwen.ai/v1",
-    auth_type="oauth_external",
-    default_max_tokens=65536,
-)
-
-register_provider(qwen)
@@ -1,14 +0,0 @@
-"""StepFun provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-stepfun = ProviderProfile(
-    name="stepfun",
-    aliases=("step", "stepfun-coding-plan"),
-    default_aux_model="step-3.5-flash",
-    env_vars=("STEPFUN_API_KEY",),
-    base_url="https://api.stepfun.ai/step_plan/v1",
-)
-
-register_provider(stepfun)
--- a/Show More
+++ b/Show More