fix: include cache tokens in dashboard analytics input totals

The /api/analytics/usage endpoint summed only the raw input_tokens column, which for Anthropic-direct sessions holds only the uncached portion of the prompt. cache_read_tokens and cache_write_tokens (which complete the total prompt) were ignored. This caused the dashboard to massively undercount token usage — showing ~117M instead of ~345M over 30 days — since Anthropic sessions with high cache hit rates stored almost all prompt tokens in the cache columns. Fix: fold COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0) into the input_tokens sum across all three SQL queries (daily, by-model, totals). This is correct for every provider because normalize_usage() guarantees input_tokens + cache_read + cache_write = total prompt tokens regardless of API shape (Anthropic / OpenAI / Codex). Add a regression test that creates a session with Anthropic-style token splits and asserts the endpoint returns the combined total.
2026-04-27 21:48:41 +02:00
164 changed files with 1457 additions and 12049 deletions
@@ -69,4 +69,3 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
-models-dev-upstream/
@@ -1,632 +0,0 @@
-"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
-
-This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
-backend. Each request starts a short-lived ACP session, sends the formatted
-conversation as a single prompt, collects text chunks, and converts the result
-back into the minimal shape Hermes expects from an OpenAI client.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import queue
-import re
-import shlex
-import subprocess
-import threading
-import time
-from collections import deque
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
-ACP_MARKER_BASE_URL = "acp://copilot"
-_DEFAULT_TIMEOUT_SECONDS = 900.0
-
-_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
-_TOOL_CALL_JSON_RE = re.compile(
-    r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
-    re.DOTALL,
-)
-
-
-def _resolve_command() -> str:
-    return (
-        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
-        or os.getenv("COPILOT_CLI_PATH", "").strip()
-        or "copilot"
-    )
-
-
-def _resolve_args() -> list[str]:
-    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
-    if not raw:
-        return ["--acp", "--stdio"]
-    return shlex.split(raw)
-
-
-def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "error": {
-            "code": code,
-            "message": message,
-        },
-    }
-
-
-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
-def _format_messages_as_prompt(
-    messages: list[dict[str, Any]],
-    model: str | None = None,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: Any = None,
-) -> str:
-    sections: list[str] = [
-        "You are being used as the active ACP agent backend for Hermes.",
-        "Use ACP capabilities to complete tasks.",
-        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
-        "If no tool is needed, answer normally.",
-    ]
-    if model:
-        sections.append(f"Hermes requested model hint: {model}")
-
-    if isinstance(tools, list) and tools:
-        tool_specs: list[dict[str, Any]] = []
-        for t in tools:
-            if not isinstance(t, dict):
-                continue
-            fn = t.get("function") or {}
-            if not isinstance(fn, dict):
-                continue
-            name = fn.get("name")
-            if not isinstance(name, str) or not name.strip():
-                continue
-            tool_specs.append(
-                {
-                    "name": name.strip(),
-                    "description": fn.get("description", ""),
-                    "parameters": fn.get("parameters", {}),
-                }
-            )
-        if tool_specs:
-            sections.append(
-                "Available tools (OpenAI function schema). "
-                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
-                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
-                + json.dumps(tool_specs, ensure_ascii=False)
-            )
-
-    if tool_choice is not None:
-        sections.append(
-            f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
-        )
-
-    transcript: list[str] = []
-    for message in messages:
-        if not isinstance(message, dict):
-            continue
-        role = str(message.get("role") or "unknown").strip().lower()
-        if role == "tool":
-            role = "tool"
-        elif role not in {"system", "user", "assistant"}:
-            role = "context"
-
-        content = message.get("content")
-        rendered = _render_message_content(content)
-        if not rendered:
-            continue
-
-        label = {
-            "system": "System",
-            "user": "User",
-            "assistant": "Assistant",
-            "tool": "Tool",
-            "context": "Context",
-        }.get(role, role.title())
-        transcript.append(f"{label}:\n{rendered}")
-
-    if transcript:
-        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
-
-    sections.append("Continue the conversation from the latest user request.")
-    return "\n\n".join(
-        section.strip() for section in sections if section and section.strip()
-    )
-
-
-def _render_message_content(content: Any) -> str:
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content.strip()
-    if isinstance(content, dict):
-        if "text" in content:
-            return str(content.get("text") or "").strip()
-        if "content" in content and isinstance(content.get("content"), str):
-            return str(content.get("content") or "").strip()
-        return json.dumps(content, ensure_ascii=True)
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str) and text.strip():
-                    parts.append(text.strip())
-        return "\n".join(parts).strip()
-    return str(content).strip()
-
-
-def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
-    if not isinstance(text, str) or not text.strip():
-        return [], ""
-
-    extracted: list[SimpleNamespace] = []
-    consumed_spans: list[tuple[int, int]] = []
-
-    def _try_add_tool_call(raw_json: str) -> None:
-        try:
-            obj = json.loads(raw_json)
-        except Exception:
-            return
-        if not isinstance(obj, dict):
-            return
-        fn = obj.get("function")
-        if not isinstance(fn, dict):
-            return
-        fn_name = fn.get("name")
-        if not isinstance(fn_name, str) or not fn_name.strip():
-            return
-        fn_args = fn.get("arguments", "{}")
-        if not isinstance(fn_args, str):
-            fn_args = json.dumps(fn_args, ensure_ascii=False)
-        call_id = obj.get("id")
-        if not isinstance(call_id, str) or not call_id.strip():
-            call_id = f"acp_call_{len(extracted) + 1}"
-
-        extracted.append(
-            SimpleNamespace(
-                id=call_id,
-                call_id=call_id,
-                response_item_id=None,
-                type="function",
-                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
-            )
-        )
-
-    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
-        raw = m.group(1)
-        _try_add_tool_call(raw)
-        consumed_spans.append((m.start(), m.end()))
-
-    # Only try bare-JSON fallback when no XML blocks were found.
-    if not extracted:
-        for m in _TOOL_CALL_JSON_RE.finditer(text):
-            raw = m.group(0)
-            _try_add_tool_call(raw)
-            consumed_spans.append((m.start(), m.end()))
-
-    if not consumed_spans:
-        return extracted, text.strip()
-
-    consumed_spans.sort()
-    merged: list[tuple[int, int]] = []
-    for start, end in consumed_spans:
-        if not merged or start > merged[-1][1]:
-            merged.append((start, end))
-        else:
-            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
-
-    parts: list[str] = []
-    cursor = 0
-    for start, end in merged:
-        if cursor < start:
-            parts.append(text[cursor:start])
-        cursor = max(cursor, end)
-    if cursor < len(text):
-        parts.append(text[cursor:])
-
-    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
-    return extracted, cleaned
-
-
-def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
-    candidate = Path(path_text)
-    if not candidate.is_absolute():
-        raise PermissionError("ACP file-system paths must be absolute.")
-    resolved = candidate.resolve()
-    root = Path(cwd).resolve()
-    try:
-        resolved.relative_to(root)
-    except ValueError as exc:
-        raise PermissionError(
-            f"Path '{resolved}' is outside the session cwd '{root}'."
-        ) from exc
-    return resolved
-
-
-class _ACPChatCompletions:
-    def __init__(self, client: CopilotACPClient):
-        self._client = client
-
-    def create(self, **kwargs: Any) -> Any:
-        return self._client._create_chat_completion(**kwargs)
-
-
-class _ACPChatNamespace:
-    def __init__(self, client: CopilotACPClient):
-        self.completions = _ACPChatCompletions(client)
-
-
-class CopilotACPClient:
-    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | None = None,
-        default_headers: dict[str, str] | None = None,
-        acp_command: str | None = None,
-        acp_args: list[str] | None = None,
-        acp_cwd: str | None = None,
-        command: str | None = None,
-        args: list[str] | None = None,
-        **_: Any,
-    ):
-        self.api_key = api_key or "copilot-acp"
-        self.base_url = base_url or ACP_MARKER_BASE_URL
-        self._default_headers = dict(default_headers or {})
-        self._acp_command = acp_command or command or _resolve_command()
-        self._acp_args = list(acp_args or args or _resolve_args())
-        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
-        self.chat = _ACPChatNamespace(self)
-        self.is_closed = False
-        self._active_process: subprocess.Popen[str] | None = None
-        self._active_process_lock = threading.Lock()
-
-    def close(self) -> None:
-        proc: subprocess.Popen[str] | None
-        with self._active_process_lock:
-            proc = self._active_process
-            self._active_process = None
-        self.is_closed = True
-        if proc is None:
-            return
-        try:
-            proc.terminate()
-            proc.wait(timeout=2)
-        except Exception:
-            try:
-                proc.kill()
-            except Exception:
-                pass
-
-    def _create_chat_completion(
-        self,
-        *,
-        model: str | None = None,
-        messages: list[dict[str, Any]] | None = None,
-        timeout: float | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        tool_choice: Any = None,
-        **_: Any,
-    ) -> Any:
-        prompt_text = _format_messages_as_prompt(
-            messages or [],
-            model=model,
-            tools=tools,
-            tool_choice=tool_choice,
-        )
-        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
-        # (used natively by the OpenAI SDK) rather than a plain float.
-        if timeout is None:
-            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
-        elif isinstance(timeout, (int, float)):
-            _effective_timeout = float(timeout)
-        else:
-            # httpx.Timeout or similar — pick the largest component so the
-            # subprocess has enough wall-clock time for the full response.
-            _candidates = [
-                getattr(timeout, attr, None)
-                for attr in ("read", "write", "connect", "pool", "timeout")
-            ]
-            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
-            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
-
-        response_text, reasoning_text = self._run_prompt(
-            prompt_text,
-            timeout_seconds=_effective_timeout,
-        )
-
-        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
-
-        usage = SimpleNamespace(
-            prompt_tokens=0,
-            completion_tokens=0,
-            total_tokens=0,
-            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
-        )
-        assistant_message = SimpleNamespace(
-            content=cleaned_text,
-            tool_calls=tool_calls,
-            reasoning=reasoning_text or None,
-            reasoning_content=reasoning_text or None,
-            reasoning_details=None,
-        )
-        finish_reason = "tool_calls" if tool_calls else "stop"
-        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
-        return SimpleNamespace(
-            choices=[choice],
-            usage=usage,
-            model=model or "copilot-acp",
-        )
-
-    def _run_prompt(
-        self, prompt_text: str, *, timeout_seconds: float
-    ) -> tuple[str, str]:
-        try:
-            proc = subprocess.Popen(
-                [self._acp_command] + self._acp_args,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                bufsize=1,
-                cwd=self._acp_cwd,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                f"Could not start Copilot ACP command '{self._acp_command}'. "
-                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
-            ) from exc
-
-        if proc.stdin is None or proc.stdout is None:
-            proc.kill()
-            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
-
-        self.is_closed = False
-        with self._active_process_lock:
-            self._active_process = proc
-
-        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
-        stderr_tail: deque[str] = deque(maxlen=40)
-
-        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
-            for line in proc.stdout:
-                try:
-                    inbox.put(json.loads(line))
-                except Exception:
-                    inbox.put({"raw": line.rstrip("\n")})
-
-        def _stderr_reader() -> None:
-            if proc.stderr is None:
-                return
-            for line in proc.stderr:
-                stderr_tail.append(line.rstrip("\n"))
-
-        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
-        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
-        out_thread.start()
-        err_thread.start()
-
-        next_id = 0
-
-        def _request(
-            method: str,
-            params: dict[str, Any],
-            *,
-            text_parts: list[str] | None = None,
-            reasoning_parts: list[str] | None = None,
-        ) -> Any:
-            nonlocal next_id
-            next_id += 1
-            request_id = next_id
-            payload = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "method": method,
-                "params": params,
-            }
-            assert proc.stdin is not None  # always set: Popen(stdin=PIPE)
-            proc.stdin.write(json.dumps(payload) + "\n")
-            proc.stdin.flush()
-
-            deadline = time.time() + timeout_seconds
-            while time.time() < deadline:
-                if proc.poll() is not None:
-                    break
-                try:
-                    msg = inbox.get(timeout=0.1)
-                except queue.Empty:
-                    continue
-
-                if self._handle_server_message(
-                    msg,
-                    process=proc,
-                    cwd=self._acp_cwd,
-                    text_parts=text_parts,
-                    reasoning_parts=reasoning_parts,
-                ):
-                    continue
-
-                if msg.get("id") != request_id:
-                    continue
-                if "error" in msg:
-                    err = msg.get("error") or {}
-                    raise RuntimeError(
-                        f"Copilot ACP {method} failed: {err.get('message') or err}"
-                    )
-                return msg.get("result")
-
-            stderr_text = "\n".join(stderr_tail).strip()
-            if proc.poll() is not None and stderr_text:
-                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
-            raise TimeoutError(
-                f"Timed out waiting for Copilot ACP response to {method}."
-            )
-
-        try:
-            _request(
-                "initialize",
-                {
-                    "protocolVersion": 1,
-                    "clientCapabilities": {
-                        "fs": {
-                            "readTextFile": True,
-                            "writeTextFile": True,
-                        }
-                    },
-                    "clientInfo": {
-                        "name": "hermes-agent",
-                        "title": "Hermes Agent",
-                        "version": "0.0.0",
-                    },
-                },
-            )
-            session = (
-                _request(
-                    "session/new",
-                    {
-                        "cwd": self._acp_cwd,
-                        "mcpServers": [],
-                    },
-                )
-                or {}
-            )
-            session_id = str(session.get("sessionId") or "").strip()
-            if not session_id:
-                raise RuntimeError("Copilot ACP did not return a sessionId.")
-
-            text_parts: list[str] = []
-            reasoning_parts: list[str] = []
-            _request(
-                "session/prompt",
-                {
-                    "sessionId": session_id,
-                    "prompt": [
-                        {
-                            "type": "text",
-                            "text": prompt_text,
-                        }
-                    ],
-                },
-                text_parts=text_parts,
-                reasoning_parts=reasoning_parts,
-            )
-            return "".join(text_parts), "".join(reasoning_parts)
-        finally:
-            self.close()
-
-    def _handle_server_message(
-        self,
-        msg: dict[str, Any],
-        *,
-        process: subprocess.Popen[str],
-        cwd: str,
-        text_parts: list[str] | None,
-        reasoning_parts: list[str] | None,
-    ) -> bool:
-        method = msg.get("method")
-        if not isinstance(method, str):
-            return False
-
-        if method == "session/update":
-            params = msg.get("params") or {}
-            update = params.get("update") or {}
-            kind = str(update.get("sessionUpdate") or "").strip()
-            content = update.get("content") or {}
-            chunk_text = ""
-            if isinstance(content, dict):
-                chunk_text = str(content.get("text") or "")
-            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
-                text_parts.append(chunk_text)
-            elif (
-                kind == "agent_thought_chunk"
-                and chunk_text
-                and reasoning_parts is not None
-            ):
-                reasoning_parts.append(chunk_text)
-            return True
-
-        if process.stdin is None:
-            return True
-
-        message_id = msg.get("id")
-        params = msg.get("params") or {}
-
-        if method == "session/request_permission":
-            response = _permission_denied(message_id)
-        elif method == "fs/read_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
-                content = path.read_text() if path.exists() else ""
-                line = params.get("line")
-                limit = params.get("limit")
-                if isinstance(line, int) and line > 1:
-                    lines = content.splitlines(keepends=True)
-                    start = line - 1
-                    end = (
-                        start + limit if isinstance(limit, int) and limit > 0 else None
-                    )
-                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content)
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": {
-                        "content": content,
-                    },
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        elif method == "fs/write_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
-                path.parent.mkdir(parents=True, exist_ok=True)
-                path.write_text(str(params.get("content") or ""))
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": None,
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        else:
-            response = _jsonrpc_error(
-                message_id,
-                -32601,
-                f"ACP client method '{method}' is not supported by Hermes yet.",
-            )
-
-        process.stdin.write(json.dumps(response) + "\n")
-        process.stdin.flush()
-        return True
@@ -82,8 +82,6 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -151,31 +149,22 @@ def _fixed_temperature_for_model(
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-def _get_aux_model_for_provider(provider_id: str) -> str:
-    """Return the cheap auxiliary model for a provider.
-
-    Reads from ProviderProfile.default_aux_model first, falling back to the
-    legacy hardcoded dict for providers that predate the profiles system.
-    """
-    try:
-        from providers import get_provider_profile
-        _p = get_provider_profile(provider_id)
-        if _p and _p.default_aux_model:
-            return _p.default_aux_model
-    except Exception:
-        pass
-    return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
-
-
-# Fallback for providers not yet migrated to ProviderProfile.default_aux_model.
-# New providers should set default_aux_model on their profile instead.
-_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
+    "gemini": "gemini-3-flash-preview",
+    "zai": "glm-4.5-flash",
+    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
+    "kimi-coding-cn": "kimi-k2-turbo-preview",
+    "minimax": "MiniMax-M2.7",
+    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
+    "ai-gateway": "google/gemini-3-flash",
+    "opencode-zen": "gemini-3-flash",
+    "opencode-go": "glm-5",
+    "kilocode": "google/gemini-3-flash-preview",
+    "ollama-cloud": "nemotron-3-nano:30b",
 }

-# Legacy alias — callers that haven't been updated yet can still use this.
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
-
 # Vision-specific model overrides for direct providers.
 # When the user's main provider has a dedicated vision/multimodal model that
 # differs from their main chat model, map it here.  The vision auto-detect
@@ -876,7 +865,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            base_url = _to_openai_base_url(
                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
            )
-            model = _get_aux_model_for_provider(provider_id) or None
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -885,22 +874,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
-        extra = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.models import copilot_default_headers
+            extra = {}
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+                from hermes_cli.models import copilot_default_headers

-            extra["default_headers"] = copilot_default_headers()
-        else:
-            try:
-                from providers import get_provider_profile as _gpf_aux
-                _ph_aux = _gpf_aux(provider_id)
-                if _ph_aux and _ph_aux.default_headers:
-                    extra["default_headers"] = dict(_ph_aux.default_headers)
-            except Exception:
-                pass
-        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+                extra["default_headers"] = copilot_default_headers()
+            return OpenAI(api_key=api_key, base_url=base_url, **extra), model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
@@ -910,7 +891,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        base_url = _to_openai_base_url(
            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        )
-        model = _get_aux_model_for_provider(provider_id) or None
+        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -926,14 +907,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        else:
-            try:
-                from providers import get_provider_profile as _gpf_aux2
-                _ph_aux2 = _gpf_aux2(provider_id)
-                if _ph_aux2 and _ph_aux2.default_headers:
-                    extra["default_headers"] = dict(_ph_aux2.default_headers)
-            except Exception:
-                pass
        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

    return None, None
@@ -1282,7 +1255,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:

    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
-    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
+    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
    try:
        real_client = build_anthropic_client(token, base_url)
@@ -1666,7 +1639,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    except ImportError:
        pass
    try:
-        from acp_adapter.copilot_client import CopilotACPClient
+        from agent.copilot_acp_client import CopilotACPClient
        if isinstance(sync_client, CopilotACPClient):
            return sync_client, model
    except ImportError:
@@ -2010,7 +1983,7 @@ def resolve_provider_client(
            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        )

-        default_model = _get_aux_model_for_provider(provider)
+        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
@@ -2080,7 +2053,7 @@ def resolve_provider_client(
                    "process credentials are incomplete"
                )
                return None, None
-            from acp_adapter.copilot_client import CopilotACPClient
+            from agent.copilot_acp_client import CopilotACPClient

            client = CopilotACPClient(
                api_key=api_key,
@@ -2585,19 +2558,12 @@ def _is_openrouter_client(client: Any) -> bool:
    return False


-def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
-    """Best-effort check for cached clients that accept ``vendor/model`` IDs."""
-    if _is_openrouter_client(client):
-        return True
-    return bool(cached_default and "/" in cached_default)
-
-
 def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
-    """Keep slash-bearing model IDs only for cached clients that support them.
+    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.

    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
    """
-    if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
+    if model and "/" in model and not _is_openrouter_client(client):
        return cached_default
    return model or cached_default

@@ -338,10 +338,6 @@ class ContextCompressor(ContextEngine):
        self._context_probe_persistable = False
        self._previous_summary = None
        self._last_summary_error = None
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
-        self._last_aux_model_failure_error = None
-        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -445,17 +441,6 @@ class ContextCompressor(ContextEngine):
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None
-        # When summary generation fails and a static fallback is inserted,
-        # record how many turns were unrecoverably dropped so callers
-        # (gateway hygiene, /compress) can surface a visible warning.
-        self._last_summary_dropped_count: int = 0
-        self._last_summary_fallback_used: bool = False
-        # When a user-configured summary model fails and we recover by
-        # retrying on the main model, record the failure so gateway /
-        # CLI callers can still warn the user even though compression
-        # succeeded.  Silent recovery would hide the broken config.
-        self._last_aux_model_failure_error: Optional[str] = None
-        self._last_aux_model_failure_model: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -915,50 +900,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
-                # Record the aux-model failure so callers can warn the user
-                # even if the retry-on-main succeeds — a misconfigured aux
-                # model is something the user needs to fix.
-                _err_text = str(e).strip() or e.__class__.__name__
-                if len(_err_text) > 220:
-                    _err_text = _err_text[:217].rstrip() + "..."
-                self._last_aux_model_failure_error = _err_text
-                self._last_aux_model_failure_model = self.summary_model
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

-            # Unknown-error best-effort retry on main model.  Losing N turns of
-            # context is almost always worse than one extra summary attempt, so
-            # if we haven't already fallen back and the summary model differs
-            # from the main model, try once more on main before entering
-            # cooldown.  Errors that DID match _is_model_not_found above are
-            # already handled by the fast-path retry; this branch catches
-            # everything else (400s, provider-specific "no route" strings,
-            # aggregator rejections, etc.) where auto-retry is still safer
-            # than dropping the turns.
-            if (
-                self.summary_model
-                and self.summary_model != self.model
-                and not getattr(self, "_summary_model_fallen_back", False)
-            ):
-                self._summary_model_fallen_back = True
-                logging.warning(
-                    "Summary model '%s' failed (%s). "
-                    "Retrying on main model '%s' before giving up.",
-                    self.summary_model, e, self.model,
-                )
-                # Record the aux-model failure (see 404 branch above) — user
-                # should know their configured model is broken even if main
-                # recovers the call.
-                _err_text = str(e).strip() or e.__class__.__name__
-                if len(_err_text) > 220:
-                    _err_text = _err_text[:217].rstrip() + "..."
-                self._last_aux_model_failure_error = _err_text
-                self._last_aux_model_failure_model = self.summary_model
-                self.summary_model = ""  # empty = use main model
-                self._summary_failure_cooldown_until = 0.0
-                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
-
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
@@ -1251,13 +1196,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
-        # Reset per-call summary failure state — callers inspect these fields
-        # after compress() returns to decide whether to surface a warning.
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
-        self._last_summary_error = None
-        self._last_aux_model_failure_error = None
-        self._last_aux_model_failure_model = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1336,13 +1274,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
-            self._last_summary_dropped_count = n_dropped
-            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} message(s) were "
+                f"Summary generation was unavailable. {n_dropped} conversation turns were "
                f"removed to free context space but could not be summarized. The removed "
-                f"messages contained earlier work in this session. Continue based on the "
+                f"turns contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -1,8 +1,646 @@
-"""Backward-compatibility shim.
+"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.

-CopilotACPClient has moved to acp_adapter/copilot_client.py.
-This module re-exports it so existing callers continue to work.
+This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
+backend. Each request starts a short-lived ACP session, sends the formatted
+conversation as a single prompt, collects text chunks, and converts the result
+back into the minimal shape Hermes expects from an OpenAI client.
 """
-from acp_adapter.copilot_client import CopilotACPClient  # noqa: F401

-__all__ = ["CopilotACPClient"]
+from __future__ import annotations
+
+import json
+import os
+import queue
+import re
+import shlex
+import subprocess
+import threading
+import time
+from collections import deque
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
+ACP_MARKER_BASE_URL = "acp://copilot"
+_DEFAULT_TIMEOUT_SECONDS = 900.0
+
+_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
+_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
+
+
+def _resolve_command() -> str:
+    return (
+        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
+        or os.getenv("COPILOT_CLI_PATH", "").strip()
+        or "copilot"
+    )
+
+
+def _resolve_args() -> list[str]:
+    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
+    if not raw:
+        return ["--acp", "--stdio"]
+    return shlex.split(raw)
+
+
+def _resolve_home_dir() -> str:
+    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
+    home = os.environ.get("HOME", "").strip()
+    if home:
+        return home
+
+    expanded = os.path.expanduser("~")
+    if expanded and expanded != "~":
+        return expanded
+
+    try:
+        import pwd
+
+        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
+        if resolved:
+            return resolved
+    except Exception:
+        pass
+
+    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
+    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
+    # need a different writable dir.
+    return "/tmp"
+
+
+def _build_subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["HOME"] = _resolve_home_dir()
+    return env
+
+
+def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "error": {
+            "code": code,
+            "message": message,
+        },
+    }
+
+
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
+def _format_messages_as_prompt(
+    messages: list[dict[str, Any]],
+    model: str | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: Any = None,
+) -> str:
+    sections: list[str] = [
+        "You are being used as the active ACP agent backend for Hermes.",
+        "Use ACP capabilities to complete tasks.",
+        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
+        "If no tool is needed, answer normally.",
+    ]
+    if model:
+        sections.append(f"Hermes requested model hint: {model}")
+
+    if isinstance(tools, list) and tools:
+        tool_specs: list[dict[str, Any]] = []
+        for t in tools:
+            if not isinstance(t, dict):
+                continue
+            fn = t.get("function") or {}
+            if not isinstance(fn, dict):
+                continue
+            name = fn.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            tool_specs.append(
+                {
+                    "name": name.strip(),
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                }
+            )
+        if tool_specs:
+            sections.append(
+                "Available tools (OpenAI function schema). "
+                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
+                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+                + json.dumps(tool_specs, ensure_ascii=False)
+            )
+
+    if tool_choice is not None:
+        sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
+
+    transcript: list[str] = []
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+        role = str(message.get("role") or "unknown").strip().lower()
+        if role == "tool":
+            role = "tool"
+        elif role not in {"system", "user", "assistant"}:
+            role = "context"
+
+        content = message.get("content")
+        rendered = _render_message_content(content)
+        if not rendered:
+            continue
+
+        label = {
+            "system": "System",
+            "user": "User",
+            "assistant": "Assistant",
+            "tool": "Tool",
+            "context": "Context",
+        }.get(role, role.title())
+        transcript.append(f"{label}:\n{rendered}")
+
+    if transcript:
+        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
+
+    sections.append("Continue the conversation from the latest user request.")
+    return "\n\n".join(section.strip() for section in sections if section and section.strip())
+
+
+def _render_message_content(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, dict):
+        if "text" in content:
+            return str(content.get("text") or "").strip()
+        if "content" in content and isinstance(content.get("content"), str):
+            return str(content.get("content") or "").strip()
+        return json.dumps(content, ensure_ascii=True)
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str) and text.strip():
+                    parts.append(text.strip())
+        return "\n".join(parts).strip()
+    return str(content).strip()
+
+
+def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
+    if not isinstance(text, str) or not text.strip():
+        return [], ""
+
+    extracted: list[SimpleNamespace] = []
+    consumed_spans: list[tuple[int, int]] = []
+
+    def _try_add_tool_call(raw_json: str) -> None:
+        try:
+            obj = json.loads(raw_json)
+        except Exception:
+            return
+        if not isinstance(obj, dict):
+            return
+        fn = obj.get("function")
+        if not isinstance(fn, dict):
+            return
+        fn_name = fn.get("name")
+        if not isinstance(fn_name, str) or not fn_name.strip():
+            return
+        fn_args = fn.get("arguments", "{}")
+        if not isinstance(fn_args, str):
+            fn_args = json.dumps(fn_args, ensure_ascii=False)
+        call_id = obj.get("id")
+        if not isinstance(call_id, str) or not call_id.strip():
+            call_id = f"acp_call_{len(extracted)+1}"
+
+        extracted.append(
+            SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=None,
+                type="function",
+                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
+            )
+        )
+
+    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
+        raw = m.group(1)
+        _try_add_tool_call(raw)
+        consumed_spans.append((m.start(), m.end()))
+
+    # Only try bare-JSON fallback when no XML blocks were found.
+    if not extracted:
+        for m in _TOOL_CALL_JSON_RE.finditer(text):
+            raw = m.group(0)
+            _try_add_tool_call(raw)
+            consumed_spans.append((m.start(), m.end()))
+
+    if not consumed_spans:
+        return extracted, text.strip()
+
+    consumed_spans.sort()
+    merged: list[tuple[int, int]] = []
+    for start, end in consumed_spans:
+        if not merged or start > merged[-1][1]:
+            merged.append((start, end))
+        else:
+            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+
+    parts: list[str] = []
+    cursor = 0
+    for start, end in merged:
+        if cursor < start:
+            parts.append(text[cursor:start])
+        cursor = max(cursor, end)
+    if cursor < len(text):
+        parts.append(text[cursor:])
+
+    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
+    return extracted, cleaned
+
+
+
+def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
+    candidate = Path(path_text)
+    if not candidate.is_absolute():
+        raise PermissionError("ACP file-system paths must be absolute.")
+    resolved = candidate.resolve()
+    root = Path(cwd).resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError as exc:
+        raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
+    return resolved
+
+
+class _ACPChatCompletions:
+    def __init__(self, client: "CopilotACPClient"):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _ACPChatNamespace:
+    def __init__(self, client: "CopilotACPClient"):
+        self.completions = _ACPChatCompletions(client)
+
+
+class CopilotACPClient:
+    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        default_headers: dict[str, str] | None = None,
+        acp_command: str | None = None,
+        acp_args: list[str] | None = None,
+        acp_cwd: str | None = None,
+        command: str | None = None,
+        args: list[str] | None = None,
+        **_: Any,
+    ):
+        self.api_key = api_key or "copilot-acp"
+        self.base_url = base_url or ACP_MARKER_BASE_URL
+        self._default_headers = dict(default_headers or {})
+        self._acp_command = acp_command or command or _resolve_command()
+        self._acp_args = list(acp_args or args or _resolve_args())
+        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
+        self.chat = _ACPChatNamespace(self)
+        self.is_closed = False
+        self._active_process: subprocess.Popen[str] | None = None
+        self._active_process_lock = threading.Lock()
+
+    def close(self) -> None:
+        proc: subprocess.Popen[str] | None
+        with self._active_process_lock:
+            proc = self._active_process
+            self._active_process = None
+        self.is_closed = True
+        if proc is None:
+            return
+        try:
+            proc.terminate()
+            proc.wait(timeout=2)
+        except Exception:
+            try:
+                proc.kill()
+            except Exception:
+                pass
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str | None = None,
+        messages: list[dict[str, Any]] | None = None,
+        timeout: float | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: Any = None,
+        **_: Any,
+    ) -> Any:
+        prompt_text = _format_messages_as_prompt(
+            messages or [],
+            model=model,
+            tools=tools,
+            tool_choice=tool_choice,
+        )
+        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+        # (used natively by the OpenAI SDK) rather than a plain float.
+        if timeout is None:
+            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+        elif isinstance(timeout, (int, float)):
+            _effective_timeout = float(timeout)
+        else:
+            # httpx.Timeout or similar — pick the largest component so the
+            # subprocess has enough wall-clock time for the full response.
+            _candidates = [
+                getattr(timeout, attr, None)
+                for attr in ("read", "write", "connect", "pool", "timeout")
+            ]
+            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
+        response_text, reasoning_text = self._run_prompt(
+            prompt_text,
+            timeout_seconds=_effective_timeout,
+        )
+
+        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
+
+        usage = SimpleNamespace(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+        )
+        assistant_message = SimpleNamespace(
+            content=cleaned_text,
+            tool_calls=tool_calls,
+            reasoning=reasoning_text or None,
+            reasoning_content=reasoning_text or None,
+            reasoning_details=None,
+        )
+        finish_reason = "tool_calls" if tool_calls else "stop"
+        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
+        return SimpleNamespace(
+            choices=[choice],
+            usage=usage,
+            model=model or "copilot-acp",
+        )
+
+    def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
+        try:
+            proc = subprocess.Popen(
+                [self._acp_command] + self._acp_args,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                cwd=self._acp_cwd,
+                env=_build_subprocess_env(),
+            )
+        except FileNotFoundError as exc:
+            raise RuntimeError(
+                f"Could not start Copilot ACP command '{self._acp_command}'. "
+                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
+            ) from exc
+
+        if proc.stdin is None or proc.stdout is None:
+            proc.kill()
+            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
+
+        self.is_closed = False
+        with self._active_process_lock:
+            self._active_process = proc
+
+        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
+        stderr_tail: deque[str] = deque(maxlen=40)
+
+        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
+            for line in proc.stdout:
+                try:
+                    inbox.put(json.loads(line))
+                except Exception:
+                    inbox.put({"raw": line.rstrip("\n")})
+
+        def _stderr_reader() -> None:
+            if proc.stderr is None:
+                return
+            for line in proc.stderr:
+                stderr_tail.append(line.rstrip("\n"))
+
+        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
+        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
+        out_thread.start()
+        err_thread.start()
+
+        next_id = 0
+
+        def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
+            nonlocal next_id
+            next_id += 1
+            request_id = next_id
+            payload = {
+                "jsonrpc": "2.0",
+                "id": request_id,
+                "method": method,
+                "params": params,
+            }
+            proc.stdin.write(json.dumps(payload) + "\n")
+            proc.stdin.flush()
+
+            deadline = time.time() + timeout_seconds
+            while time.time() < deadline:
+                if proc.poll() is not None:
+                    break
+                try:
+                    msg = inbox.get(timeout=0.1)
+                except queue.Empty:
+                    continue
+
+                if self._handle_server_message(
+                    msg,
+                    process=proc,
+                    cwd=self._acp_cwd,
+                    text_parts=text_parts,
+                    reasoning_parts=reasoning_parts,
+                ):
+                    continue
+
+                if msg.get("id") != request_id:
+                    continue
+                if "error" in msg:
+                    err = msg.get("error") or {}
+                    raise RuntimeError(
+                        f"Copilot ACP {method} failed: {err.get('message') or err}"
+                    )
+                return msg.get("result")
+
+            stderr_text = "\n".join(stderr_tail).strip()
+            if proc.poll() is not None and stderr_text:
+                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
+            raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
+
+        try:
+            _request(
+                "initialize",
+                {
+                    "protocolVersion": 1,
+                    "clientCapabilities": {
+                        "fs": {
+                            "readTextFile": True,
+                            "writeTextFile": True,
+                        }
+                    },
+                    "clientInfo": {
+                        "name": "hermes-agent",
+                        "title": "Hermes Agent",
+                        "version": "0.0.0",
+                    },
+                },
+            )
+            session = _request(
+                "session/new",
+                {
+                    "cwd": self._acp_cwd,
+                    "mcpServers": [],
+                },
+            ) or {}
+            session_id = str(session.get("sessionId") or "").strip()
+            if not session_id:
+                raise RuntimeError("Copilot ACP did not return a sessionId.")
+
+            text_parts: list[str] = []
+            reasoning_parts: list[str] = []
+            _request(
+                "session/prompt",
+                {
+                    "sessionId": session_id,
+                    "prompt": [
+                        {
+                            "type": "text",
+                            "text": prompt_text,
+                        }
+                    ],
+                },
+                text_parts=text_parts,
+                reasoning_parts=reasoning_parts,
+            )
+            return "".join(text_parts), "".join(reasoning_parts)
+        finally:
+            self.close()
+
+    def _handle_server_message(
+        self,
+        msg: dict[str, Any],
+        *,
+        process: subprocess.Popen[str],
+        cwd: str,
+        text_parts: list[str] | None,
+        reasoning_parts: list[str] | None,
+    ) -> bool:
+        method = msg.get("method")
+        if not isinstance(method, str):
+            return False
+
+        if method == "session/update":
+            params = msg.get("params") or {}
+            update = params.get("update") or {}
+            kind = str(update.get("sessionUpdate") or "").strip()
+            content = update.get("content") or {}
+            chunk_text = ""
+            if isinstance(content, dict):
+                chunk_text = str(content.get("text") or "")
+            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
+                text_parts.append(chunk_text)
+            elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
+                reasoning_parts.append(chunk_text)
+            return True
+
+        if process.stdin is None:
+            return True
+
+        message_id = msg.get("id")
+        params = msg.get("params") or {}
+
+        if method == "session/request_permission":
+            response = _permission_denied(message_id)
+        elif method == "fs/read_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
+                content = path.read_text() if path.exists() else ""
+                line = params.get("line")
+                limit = params.get("limit")
+                if isinstance(line, int) and line > 1:
+                    lines = content.splitlines(keepends=True)
+                    start = line - 1
+                    end = start + limit if isinstance(limit, int) and limit > 0 else None
+                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": {
+                        "content": content,
+                    },
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        elif method == "fs/write_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_text(str(params.get("content") or ""))
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": None,
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        else:
+            response = _jsonrpc_error(
+                message_id,
+                -32601,
+                f"ACP client method '{method}' is not supported by Hermes yet.",
+            )
+
+        process.stdin.write(json.dumps(response) + "\n")
+        process.stdin.flush()
+        return True
@@ -63,124 +63,15 @@ def sanitize_context(text: str) -> str:
    return text


-class StreamingContextScrubber:
-    """Stateful scrubber for streaming text that may contain split memory-context spans.
-
-    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
-    a ``<memory-context>`` opened in one delta and closed in a later delta
-    leaks its payload to the UI because the non-greedy block regex needs
-    both tags in one string.  This scrubber runs a small state machine
-    across deltas, holding back partial-tag tails and discarding
-    everything inside a span (including the system-note line).
-
-    Usage::
-
-        scrubber = StreamingContextScrubber()
-        for delta in stream:
-            visible = scrubber.feed(delta)
-            if visible:
-                emit(visible)
-        trailing = scrubber.flush()  # at end of stream
-        if trailing:
-            emit(trailing)
-
-    The scrubber is re-entrant per agent instance.  Callers building new
-    top-level responses (new turn) should create a fresh scrubber or call
-    ``reset()``.
-    """
-
-    _OPEN_TAG = "<memory-context>"
-    _CLOSE_TAG = "</memory-context>"
-
-    def __init__(self) -> None:
-        self._in_span: bool = False
-        self._buf: str = ""
-
-    def reset(self) -> None:
-        self._in_span = False
-        self._buf = ""
-
-    def feed(self, text: str) -> str:
-        """Return the visible portion of ``text`` after scrubbing.
-
-        Any trailing fragment that could be the start of an open/close tag
-        is held back in the internal buffer and surfaced on the next
-        ``feed()`` call or discarded/emitted by ``flush()``.
-        """
-        if not text:
-            return ""
-        buf = self._buf + text
-        self._buf = ""
-        out: list[str] = []
-
-        while buf:
-            if self._in_span:
-                idx = buf.lower().find(self._CLOSE_TAG)
-                if idx == -1:
-                    # Hold back a potential partial close tag; drop the rest
-                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
-                    self._buf = buf[-held:] if held else ""
-                    return "".join(out)
-                # Found close — skip span content + tag, continue
-                buf = buf[idx + len(self._CLOSE_TAG):]
-                self._in_span = False
-            else:
-                idx = buf.lower().find(self._OPEN_TAG)
-                if idx == -1:
-                    # No open tag — hold back a potential partial open tag
-                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
-                    if held:
-                        out.append(buf[:-held])
-                        self._buf = buf[-held:]
-                    else:
-                        out.append(buf)
-                    return "".join(out)
-                # Emit text before the tag, enter span
-                if idx > 0:
-                    out.append(buf[:idx])
-                buf = buf[idx + len(self._OPEN_TAG):]
-                self._in_span = True
-
-        return "".join(out)
-
-    def flush(self) -> str:
-        """Emit any held-back buffer at end-of-stream.
-
-        If we're still inside an unterminated span the remaining content is
-        discarded (safer: leaking partial memory context is worse than a
-        truncated answer).  Otherwise the held-back partial-tag tail is
-        emitted verbatim (it turned out not to be a real tag).
-        """
-        if self._in_span:
-            self._buf = ""
-            self._in_span = False
-            return ""
-        tail = self._buf
-        self._buf = ""
-        return tail
-
-    @staticmethod
-    def _max_partial_suffix(buf: str, tag: str) -> int:
-        """Return the length of the longest buf-suffix that is a tag-prefix.
-
-        Case-insensitive.  Returns 0 if no suffix could start the tag.
-        """
-        tag_lower = tag.lower()
-        buf_lower = buf.lower()
-        max_check = min(len(buf_lower), len(tag_lower) - 1)
-        for i in range(max_check, 0, -1):
-            if tag_lower.startswith(buf_lower[-i:]):
-                return i
-        return 0
-
-
 def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note."""
+    """Wrap prefetched memory in a fenced block with system note.
+
+    The fence prevents the model from treating recalled context as user
+    discourse.  Injected at API-call time only — never persisted.
+    """
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
-    if clean != raw_context:
-        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -51,7 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
-    "gmi",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
-    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -309,21 +307,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
-    "api.gmi-serving.com": "gmi",
    "ollama.com": "ollama-cloud",
 }

-# Auto-extend with hostnames derived from provider profiles.
-# Any provider with a base_url not already in the map gets added automatically.
-try:
-    from providers import list_providers as _list_providers
-    for _pp in _list_providers():
-        _host = _pp.get_hostname()
-        if _host and _host not in _URL_TO_PROVIDER:
-            _URL_TO_PROVIDER[_host] = _pp.name
-except Exception:
-    pass
-

 def _infer_provider_from_url(base_url: str) -> Optional[str]:
    """Infer the models.dev provider name from a base URL.
@@ -716,29 +702,6 @@ def fetch_endpoint_model_metadata(
    return {}


-def _resolve_endpoint_context_length(
-    model: str,
-    base_url: str,
-    api_key: str = "",
-) -> Optional[int]:
-    """Resolve context length from an endpoint's live ``/models`` metadata."""
-    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-    matched = endpoint_metadata.get(model)
-    if not matched:
-        if len(endpoint_metadata) == 1:
-            matched = next(iter(endpoint_metadata.values()))
-        else:
-            for key, entry in endpoint_metadata.items():
-                if model in key or key in model:
-                    matched = entry
-                    break
-    if matched:
-        context_length = matched.get("context_length")
-        if isinstance(context_length, int):
-            return context_length
-    return None
-
-
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -1332,9 +1295,22 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if context_length is not None:
-            return context_length
+        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+        matched = endpoint_metadata.get(model)
+        if not matched:
+            # Single-model servers: if only one model is loaded, use it
+            if len(endpoint_metadata) == 1:
+                matched = next(iter(endpoint_metadata.values()))
+            else:
+                # Fuzzy match: substring in either direction
+                for key, entry in endpoint_metadata.items():
+                    if model in key or key in model:
+                        matched = entry
+                        break
+        if matched:
+            context_length = matched.get("context_length")
+            if isinstance(context_length, int):
+                return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
@@ -1398,12 +1374,6 @@ def get_model_context_length(
            if base_url:
                save_context_length(model, base_url, codex_ctx)
            return codex_ctx
-    if effective_provider == "gmi" and base_url:
-        # GMI exposes authoritative context_length via /models, but it is not
-        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
-        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if ctx is not None:
-            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -6,16 +6,9 @@ Usage:
    result = transport.normalize_response(raw_response)
 """

-from agent.transports.types import (
-    NormalizedResponse,
-    ToolCall,
-    Usage,
-    build_tool_call,
-    map_finish_reason,
-)  # noqa: F401
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401

 _REGISTRY: dict = {}
-_discovered: bool = False


 def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -30,9 +23,6 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    global _discovered
-    if not _discovered:
-        _discover_transports()
    cls = _REGISTRY.get(api_mode)
    if cls is None:
        # The registry can be partially populated when a specific transport
@@ -48,8 +38,6 @@ def get_transport(api_mode: str):

 def _discover_transports() -> None:
    """Import all transport modules to trigger auto-registration."""
-    global _discovered
-    _discovered = True
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
@@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 """

 import copy
-from typing import Any
+from typing import Any, Dict, List, Optional

 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
@@ -28,9 +28,7 @@ class ChatCompletionsTransport(ProviderTransport):
    def api_mode(self) -> str:
        return "chat_completions"

-    def convert_messages(
-        self, messages: list[dict[str, Any]], **kwargs
-    ) -> list[dict[str, Any]]:
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

        Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -47,9 +45,7 @@ class ChatCompletionsTransport(ProviderTransport):
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
-                    if isinstance(tc, dict) and (
-                        "call_id" in tc or "response_item_id" in tc
-                    ):
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
                        needs_sanitize = True
                        break
                if needs_sanitize:
@@ -72,52 +68,76 @@ class ChatCompletionsTransport(ProviderTransport):
                        tc.pop("response_item_id", None)
        return sanitized

-    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Tools are already in OpenAI format — identity."""
        return tools

    def build_kwargs(
        self,
        model: str,
-        messages: list[dict[str, Any]],
-        tools: list[dict[str, Any]] | None = None,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
        **params,
-    ) -> dict[str, Any]:
+    ) -> Dict[str, Any]:
        """Build chat.completions.create() kwargs.

-        params (all optional):
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
            timeout: float — API call timeout
            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
            reasoning_config: dict | None
            request_overrides: dict | None
            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
            model_lower: str — lowercase model name for pattern matching
-            # Provider profile path (all per-provider quirks live in providers/)
-            provider_profile: ProviderProfile | None — when present, delegates to
-                _build_kwargs_from_profile(); all flag params below are bypassed.
-            # Remaining flags — only used by the legacy fallback for unregistered
-            # providers (i.e. get_provider_profile() returned None).  Known
-            # providers all go through provider_profile.
-            qwen_session_metadata: dict | None
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
-            extra_body_additions: dict | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
        """
        # Codex sanitization: drop reasoning_items / call_id / response_item_id
        sanitized = self.convert_messages(messages)

-        # ── Provider profile: single-path when present ──────────────────
-        _profile = params.get("provider_profile")
-        if _profile:
-            return self._build_kwargs_from_profile(
-                _profile, model, sanitized, tools, params
-            )
-
-        # ── Legacy fallback (unregistered / unknown provider) ───────────
-        # Reached only when get_provider_profile() returned None.
-        # Known providers always go through the profile path above.
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)

        # Developer role swap for GPT-5/Codex models
        model_lower = params.get("model_lower", (model or "").lower())
@@ -130,7 +150,7 @@ class ChatCompletionsTransport(ProviderTransport):
            sanitized = list(sanitized)
            sanitized[0] = {**sanitized[0], "role": "developer"}

-        api_kwargs: dict[str, Any] = {
+        api_kwargs: Dict[str, Any] = {
            "model": model,
            "messages": sanitized,
        }
@@ -139,6 +159,19 @@ class ChatCompletionsTransport(ProviderTransport):
        if timeout is not None:
            api_kwargs["timeout"] = timeout

+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
        # Tools
        if tools:
            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
@@ -153,24 +186,96 @@ class ChatCompletionsTransport(ProviderTransport):
        ephemeral = params.get("ephemeral_max_output_tokens")
        max_tokens = params.get("max_tokens")
        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(ephemeral))
        elif max_tokens is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
        elif anthropic_max_out is not None:
            api_kwargs["max_tokens"] = anthropic_max_out

-        # extra_body assembly
-        extra_body: dict[str, Any] = {}
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort

-        # Generic reasoning passthrough for unknown providers
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
        if params.get("supports_reasoning", False):
-            reasoning_config = params.get("reasoning_config")
-            if reasoning_config is not None:
-                extra_body["reasoning"] = dict(reasoning_config)
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True

        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
@@ -187,117 +292,6 @@ class ChatCompletionsTransport(ProviderTransport):

        return api_kwargs

-    def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
-        """Build API kwargs using a ProviderProfile — single path, no legacy flags.
-
-        This method replaces the entire flag-based kwargs assembly when a
-        provider_profile is passed. Every quirk comes from the profile object.
-        """
-        from providers.base import OMIT_TEMPERATURE
-
-        # Message preprocessing
-        sanitized = profile.prepare_messages(sanitized)
-
-        # Developer role swap — model-name-based, applies to all providers
-        _model_lower = (model or "").lower()
-        if (
-            sanitized
-            and isinstance(sanitized[0], dict)
-            and sanitized[0].get("role") == "system"
-            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            sanitized = list(sanitized)
-            sanitized[0] = {**sanitized[0], "role": "developer"}
-
-        api_kwargs: dict[str, Any] = {
-            "model": model,
-            "messages": sanitized,
-        }
-
-        # Temperature
-        if profile.fixed_temperature is OMIT_TEMPERATURE:
-            pass  # Don't include temperature at all
-        elif profile.fixed_temperature is not None:
-            api_kwargs["temperature"] = profile.fixed_temperature
-        else:
-            # Use caller's temperature if provided
-            temp = params.get("temperature")
-            if temp is not None:
-                api_kwargs["temperature"] = temp
-
-        # Timeout
-        timeout = params.get("timeout")
-        if timeout is not None:
-            api_kwargs["timeout"] = timeout
-
-        # Tools — apply Moonshot/Kimi schema sanitization regardless of path
-        if tools:
-            if is_moonshot_model(model):
-                tools = sanitize_moonshot_tools(tools)
-            api_kwargs["tools"] = tools
-
-        # max_tokens resolution — priority: ephemeral > user > profile default
-        max_tokens_fn = params.get("max_tokens_param_fn")
-        ephemeral = params.get("ephemeral_max_output_tokens")
-        user_max = params.get("max_tokens")
-        anthropic_max = params.get("anthropic_max_output")
-
-        if ephemeral is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(ephemeral))
-        elif user_max is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(user_max))
-        elif profile.default_max_tokens and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
-        elif anthropic_max is not None:
-            api_kwargs["max_tokens"] = anthropic_max
-
-        # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
-        reasoning_config = params.get("reasoning_config")
-        extra_body_from_profile, top_level_from_profile = (
-            profile.build_api_kwargs_extras(
-                reasoning_config=reasoning_config,
-                supports_reasoning=params.get("supports_reasoning", False),
-                qwen_session_metadata=params.get("qwen_session_metadata"),
-                model=model,
-                ollama_num_ctx=params.get("ollama_num_ctx"),
-            )
-        )
-        api_kwargs.update(top_level_from_profile)
-
-        # extra_body assembly
-        extra_body: dict[str, Any] = {}
-
-        # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
-        profile_body = profile.build_extra_body(
-            session_id=params.get("session_id"),
-            provider_preferences=params.get("provider_preferences"),
-        )
-        if profile_body:
-            extra_body.update(profile_body)
-
-        # Profile's reasoning/thinking extra_body entries
-        if extra_body_from_profile:
-            extra_body.update(extra_body_from_profile)
-
-        # Merge any pre-built extra_body additions from the caller
-        additions = params.get("extra_body_additions")
-        if additions:
-            extra_body.update(additions)
-
-        # Request overrides (user config)
-        overrides = params.get("request_overrides")
-        if overrides:
-            for k, v in overrides.items():
-                if k == "extra_body" and isinstance(v, dict):
-                    extra_body.update(v)
-                else:
-                    api_kwargs[k] = v
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        return api_kwargs
-
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize OpenAI ChatCompletion to NormalizedResponse.

@@ -319,7 +313,7 @@ class ChatCompletionsTransport(ProviderTransport):
                # Gemini 3 thinking models attach extra_content with
                # thought_signature — without replay on the next turn the API
                # rejects the request with 400.
-                tc_provider_data: dict[str, Any] = {}
+                tc_provider_data: Dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
                    extra = (tc.model_extra or {}).get("extra_content")
@@ -330,14 +324,12 @@ class ChatCompletionsTransport(ProviderTransport):
                        except Exception:
                            pass
                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(
-                    ToolCall(
-                        id=tc.id,
-                        name=tc.function.name,
-                        arguments=tc.function.arguments,
-                        provider_data=tc_provider_data or None,
-                    )
-                )
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -355,7 +347,7 @@ class ChatCompletionsTransport(ProviderTransport):
        reasoning = getattr(msg, "reasoning", None)
        reasoning_content = getattr(msg, "reasoning_content", None)

-        provider_data: dict[str, Any] = {}
+        provider_data: Dict[str, Any] = {}
        if reasoning_content:
            provider_data["reasoning_content"] = reasoning_content
        rd = getattr(msg, "reasoning_details", None)
@@ -381,7 +373,7 @@ class ChatCompletionsTransport(ProviderTransport):
            return False
        return True

-    def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
        usage = getattr(response, "usage", None)
        if usage is None:
@@ -12,7 +12,7 @@ from __future__ import annotations

 import json
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Dict, List, Optional


@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
    * Others: ``None``
    """

-    id: str | None
+    id: Optional[str]
    name: str
    arguments: str  # JSON string
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
        return "function"

    @property
-    def function(self) -> ToolCall:
+    def function(self) -> "ToolCall":
        """Return self so tc.function.name / tc.function.arguments work."""
        return self

    @property
-    def call_id(self) -> str | None:
+    def call_id(self) -> Optional[str]:
        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
        return (self.provider_data or {}).get("call_id")

    @property
-    def response_item_id(self) -> str | None:
+    def response_item_id(self) -> Optional[str]:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

@@ -101,18 +101,18 @@ class NormalizedResponse:
    * Others: ``None``
    """

-    content: str | None
-    tool_calls: list[ToolCall] | None
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: str | None = None
-    usage: Usage | None = None
-    provider_data: dict[str, Any] | None = field(default=None, repr=False)
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The shim _nr_to_assistant_message() mapped these from provider_data.
    # These properties let NormalizedResponse pass through directly.
    @property
-    def reasoning_content(self) -> str | None:
+    def reasoning_content(self) -> Optional[str]:
        pd = self.provider_data or {}
        return pd.get("reasoning_content")

@@ -136,9 +136,8 @@ class NormalizedResponse:
 # Factory helpers
 # ---------------------------------------------------------------------------

-
 def build_tool_call(
-    id: str | None,
+    id: Optional[str],
    name: str,
    arguments: Any,
    **provider_fields: Any,
@@ -152,7 +151,7 @@ def build_tool_call(
    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)


-def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
    """Translate a provider-specific stop reason to the normalised set.

    Falls back to ``"stop"`` for unknown or ``None`` reasons.
@@ -6000,7 +6000,6 @@ class HermesCLI:
            platform_status = {
                Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
                Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
-                Platform.SLACK: ("Slack", "SLACK_BOT_TOKEN"),
                Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
            }
            
@@ -36,7 +36,6 @@

      imports = [
        ./nix/packages.nix
-        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -566,8 +566,6 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
-                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
-                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -612,21 +610,16 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                enabled_was_explicit = "enabled" in platform_cfg
-                if not bridged and not enabled_was_explicit:
+                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
-                if enabled_was_explicit:
-                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
-                if plat == Platform.SLACK and enabled_was_explicit:
-                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -948,14 +941,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
            config.platforms[Platform.SLACK].enabled = True
-        else:
-            slack_config = config.platforms[Platform.SLACK]
-            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
-            if not slack_config.enabled and not enabled_was_explicit:
-                # Top-level Slack settings such as channel prompts should not
-                # turn an env-token setup into a disabled platform. Only an
-                # explicit slack.enabled/platforms.slack.enabled false should.
-                slack_config.enabled = True
        # If yaml config exists, respect its enabled flag (don't override
        # explicit enabled: false). Token is still stored so skills that
        # send Slack messages can use it without activating the gateway adapter.
@@ -1702,41 +1702,13 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
-
-        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
-        network round-trip can't stall the refresh cadence.  Telegram- and
-        Discord-side typing expire after ~5s; if any individual send_typing
-        takes longer than the refresh interval, the bubble would die and
-        stay dead until that call returns.  Abandoning the slow call lets
-        the next tick fire a fresh send_typing on schedule — as long as
-        one of them succeeds within the 5s platform-side window, the bubble
-        stays visible across provider stalls / upstream API timeouts.
        """
-        # Bound each send_typing round-trip so the refresh cadence isn't
-        # gated on network health.  Must stay below ``interval`` so a slow
-        # call gets abandoned before the next scheduled tick.
-        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    try:
-                        await asyncio.wait_for(
-                            self.send_typing(chat_id, metadata=metadata),
-                            timeout=_send_typing_timeout,
-                        )
-                    except asyncio.TimeoutError:
-                        # Slow network — abandon this tick, keep the loop
-                        # on schedule so the next send_typing fires fresh.
-                        pass
-                    except asyncio.CancelledError:
-                        raise
-                    except Exception as typing_err:
-                        logger.debug(
-                            "[%s] send_typing error (non-fatal): %s",
-                            self.name, typing_err,
-                        )
+                    await self.send_typing(chat_id, metadata=metadata)
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -4800,58 +4800,6 @@ class GatewayRunner:
                                            "compression",
                                            f"{_new_tokens:,}",
                                        )
-
-                                    # If summary generation failed, the
-                                    # compressor inserted a static fallback
-                                    # placeholder and the dropped turns are
-                                    # gone for good.  Surface a visible
-                                    # warning to the gateway user — agent.log
-                                    # alone is invisible on TG/Discord/etc.
-                                    _comp = getattr(_hyg_agent, "context_compressor", None)
-                                    if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
-                                        _dropped = getattr(_comp, "_last_summary_dropped_count", 0)
-                                        _err = getattr(_comp, "_last_summary_error", None) or "unknown error"
-                                        _warn_msg = (
-                                            "⚠️ Context compression summary failed "
-                                            f"({_err}). {_dropped} historical message(s) "
-                                            "were removed and replaced with a placeholder. "
-                                            "Earlier context is no longer recoverable. "
-                                            "Consider /reset for a clean session, or check "
-                                            "your auxiliary.compression model configuration."
-                                        )
-                                        try:
-                                            _adapter = self.adapters.get(source.platform)
-                                            if _adapter and source.chat_id:
-                                                await _adapter.send(source.chat_id, _warn_msg, metadata=_hyg_meta)
-                                        except Exception as _werr:
-                                            logger.warning(
-                                                "Failed to deliver compression-failure warning to user: %s",
-                                                _werr,
-                                            )
-                                    # Separately: if the user's CONFIGURED aux
-                                    # model failed and we recovered by falling
-                                    # back to the main model, tell them — a
-                                    # misconfigured auxiliary.compression.model
-                                    # is something only they can fix, and
-                                    # silent recovery would hide it.
-                                    elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None):
-                                        _aux_model = getattr(_comp, "_last_aux_model_failure_model", "")
-                                        _aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error"
-                                        _aux_msg = (
-                                            f"ℹ️ Configured compression model `{_aux_model}` "
-                                            f"failed ({_aux_err}). Recovered using your main "
-                                            "model — context is intact — but you may want to "
-                                            "check `auxiliary.compression.model` in config.yaml."
-                                        )
-                                        try:
-                                            _adapter = self.adapters.get(source.platform)
-                                            if _adapter and source.chat_id:
-                                                await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta)
-                                        except Exception as _werr:
-                                            logger.warning(
-                                                "Failed to deliver aux-model-fallback notice to user: %s",
-                                                _werr,
-                                            )
                                finally:
                                    self._cleanup_agent_resources(_hyg_agent)

@@ -7395,17 +7343,6 @@ class GatewayRunner:
                    approx_tokens,
                    new_tokens,
                )
-                # Detect summary-generation failure so we can surface a
-                # visible warning to the user even on the manual /compress
-                # path (otherwise the failure is silently logged).
-                _summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
-                _dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
-                _summary_err = getattr(compressor, "_last_summary_error", None)
-                # Separately: did the user's CONFIGURED aux model fail
-                # and we recovered via main?  Surface that as an info
-                # note so they can fix their config.
-                _aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None)
-                _aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None)
            finally:
                self._cleanup_agent_resources(tmp_agent)
            lines = [f"🗜️ {summary['headline']}"]
@@ -7414,20 +7351,6 @@ class GatewayRunner:
            lines.append(summary["token_line"])
            if summary["note"]:
                lines.append(summary["note"])
-            if _summary_failed:
-                lines.append(
-                    f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). "
-                    f"{_dropped_count} historical message(s) were removed and replaced "
-                    "with a placeholder; earlier context is no longer recoverable. "
-                    "Consider checking your auxiliary.compression model configuration."
-                )
-            elif _aux_fail_model:
-                lines.append(
-                    f"ℹ️ Configured compression model `{_aux_fail_model}` failed "
-                    f"({_aux_fail_err or 'unknown error'}). Recovered using your main "
-                    "model — context is intact — but you may want to check "
-                    "`auxiliary.compression.model` in config.yaml."
-                )
            return "\n".join(lines)
        except Exception as e:
            logger.warning("Manual compress failed: %s", e)
@@ -8560,7 +8483,6 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
-        from agent.memory_manager import sanitize_context

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8579,7 +8501,6 @@ class GatewayRunner:
                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
-                    description = sanitize_context(description)
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
@@ -224,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("ARCEEAI_API_KEY",),
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": ProviderConfig(
-        id="gmi",
-        name="GMI Cloud",
-        auth_type="api_key",
-        inference_base_url="https://api.gmi-serving.com/v1",
-        api_key_env_vars=("GMI_API_KEY",),
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -374,37 +366,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
    ),
 }

-# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
-# providers/ that is not already declared above.  New providers only need a
-# providers/*.py file — no edits to this file required.
-try:
-    from providers import list_providers as _list_providers_for_registry
-    for _pp in _list_providers_for_registry():
-        if _pp.name in PROVIDER_REGISTRY:
-            continue
-        if _pp.auth_type != "api_key" or not _pp.env_vars:
-            continue
-        # Skip providers that need custom token resolution (copilot, kimi, zai)
-        # — those are already fully declared above.
-        if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}:
-            continue
-        _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
-        _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
-        PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
-            id=_pp.name,
-            name=_pp.display_name or _pp.name,
-            auth_type="api_key",
-            inference_base_url=_pp.base_url,
-            api_key_env_vars=_api_key_vars or _pp.env_vars,
-            base_url_env_var=_base_url_var or "",
-        )
-        # Also register aliases so resolve_provider() resolves them
-        for _alias in _pp.aliases:
-            if _alias not in PROVIDER_REGISTRY:
-                PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
-except Exception:
-    pass
-

 # =============================================================================
 # Anthropic Key Helper
@@ -1159,7 +1120,6 @@ def resolve_provider(
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
-        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
@@ -1181,17 +1141,6 @@ def resolve_provider(
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
-    # Extend with aliases declared in providers/*.py that aren't already mapped.
-    # This keeps providers/ as the single source for new aliases while the
-    # hardcoded dict above remains authoritative for existing ones.
-    try:
-        from providers import list_providers as _lp
-        for _pp in _lp():
-            for _alias in _pp.aliases:
-                if _alias not in _PROVIDER_ALIASES:
-                    _PROVIDER_ALIASES[_alias] = _pp.name
-    except Exception:
-        pass
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

    if normalized == "openrouter":
@@ -1254,22 +1254,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "GMI_API_KEY": {
-        "description": "GMI Cloud API key",
-        "prompt": "GMI Cloud API key",
-        "url": "https://www.gmicloud.ai/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "GMI_BASE_URL": {
-        "description": "GMI Cloud base URL override",
-        "prompt": "GMI Cloud base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -4252,45 +4236,3 @@ def config_command(args):
        print("  hermes config path      Show config file path")
        print("  hermes config env-path  Show .env file path")
        sys.exit(1)
-
-
-# ── Profile-driven env var injection ─────────────────────────────────────────
-# Any provider registered in providers/ with auth_type="api_key" automatically
-# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
-# Runs once at import time.
-
-_profile_env_vars_injected = False
-
-
-def _inject_profile_env_vars() -> None:
-    """Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
-
-    Called once at module load time. Idempotent — repeated calls are no-ops.
-    """
-    global _profile_env_vars_injected
-    if _profile_env_vars_injected:
-        return
-    _profile_env_vars_injected = True
-    try:
-        from providers import list_providers
-        for _pp in list_providers():
-            if _pp.auth_type not in ("api_key",):
-                continue
-            for _var in _pp.env_vars:
-                if _var in OPTIONAL_ENV_VARS:
-                    continue
-                _is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
-                OPTIONAL_ENV_VARS[_var] = {
-                    "description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
-                    "prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
-                    "url": _pp.signup_url or None,
-                    "password": _is_key,
-                    "category": "provider",
-                    "advanced": True,
-                }
-    except Exception:
-        pass
-
-
-# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
-_inject_profile_env_vars()
@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
    "KIMI_CN_API_KEY",
-    "GMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -164,84 +163,6 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
        check_warn("Could not verify systemd linger", f"({linger_detail})")


-_APIKEY_PROVIDERS_CACHE: list | None = None
-
-
-def _build_apikey_providers_list() -> list:
-    """Build the API-key provider health-check list once and cache it.
-
-    Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
-    Base list augmented with any ProviderProfile with auth_type="api_key" not
-    already present — adding providers/*.py is sufficient to get into doctor.
-    """
-    _static = [
-        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
-        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("StepFun Step Plan", ("STEPFUN_API_KEY",),                          "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
-        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
-        ("Arcee AI",         ("ARCEEAI_API_KEY",),                           "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
-        ("GMI Cloud",        ("GMI_API_KEY",),                               "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
-        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                          "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
-        ("Hugging Face",     ("HF_TOKEN",),                                  "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
-        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                            "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
-        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                        "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax: the /anthropic endpoint doesn't support /models; use the /v1 surface.
-        ("MiniMax",          ("MINIMAX_API_KEY",),                           "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                        "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
-        ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",),                       "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
-        ("Kilo Code",        ("KILOCODE_API_KEY",),                          "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
-        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                      "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        # OpenCode Go has no shared /models endpoint; skip the health check.
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                       None,                                  "OPENCODE_GO_BASE_URL", False),
-    ]
-    _known_names = {t[0] for t in _static}
-    # Also index by profile canonical name so profiles without display_name
-    # don't create duplicate entries for providers already in the static list.
-    _known_canonical: set[str] = set()
-    _name_to_canonical = {
-        "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
-        "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
-        "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
-        "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
-        "Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
-        "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
-        "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
-        "OpenCode Go": "opencode-go",
-    }
-    for _label, _canonical in _name_to_canonical.items():
-        _known_canonical.add(_canonical)
-    try:
-        from providers import list_providers
-        from providers.base import ProviderProfile as _PP
-        for _pp in list_providers():
-            if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
-                continue
-            _label = _pp.display_name or _pp.name
-            if _label in _known_names or _pp.name in _known_canonical:
-                continue
-            # Separate API-key vars from base-URL override vars — the health-check
-            # loop sends the first found value as Authorization: Bearer, so a URL
-            # string must never be picked.
-            _key_vars = tuple(
-                v for v in _pp.env_vars
-                if not v.endswith("_BASE_URL") and not v.endswith("_URL")
-            )
-            _base_var = next(
-                (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
-                None,
-            )
-            if not _key_vars:
-                continue
-            _models_url = (
-                (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
-                if _pp.base_url else None
-            )
-            _static.append((_label, _key_vars, _models_url, _base_var, True))
-    except Exception:
-        pass
-    return _static
-
-
 def run_doctor(args):
    """Run diagnostic checks."""
    should_fix = getattr(args, 'fix', False)
@@ -1009,11 +930,26 @@ def run_doctor(args):

    # -- API-key providers --
    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
-    # Cached at module level after first build — profiles auto-extend it.
-    global _APIKEY_PROVIDERS_CACHE
-    if _APIKEY_PROVIDERS_CACHE is None:
-        _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
-    _apikey_providers = _APIKEY_PROVIDERS_CACHE
+    # If supports_models_endpoint is False, we skip the health check and just show "configured"
+    _apikey_providers = [
+        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
+        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
+        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
+        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
+        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
+        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
+        ("Vercel AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
+        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
+        # OpenCode Go has no shared /models endpoint; skip the health check.
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         None,                                  "OPENCODE_GO_BASE_URL", False),
+    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
        for _ev in _env_vars:
@@ -829,29 +829,8 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
    )


-_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
-
-
 def _tui_need_npm_install(root: Path) -> bool:
-    """True when @hermes/ink is missing or node_modules is behind package-lock.json.
-
-    Compares ``package-lock.json`` against ``node_modules/.package-lock.json``
-    (npm's hidden lockfile) by **content**, not mtime: git checkouts and npm
-    rewrites can bump the root lockfile's timestamp even when installed deps
-    already match, which used to trigger a spurious "Installing TUI
-    dependencies" on every launch.
-
-    For each entry in the root lock's ``packages`` map:
-      - missing from hidden lock → reinstall (unless the entry is marked
-        ``optional`` or ``peer``, which npm may intentionally skip per platform)
-      - present but with differing fields (excluding npm-written runtime
-        annotations like ``ideallyInert``) → reinstall
-
-    Extra entries that exist only in the hidden lock are ignored — stale
-    transitives left over from a removed dependency don't break runtime and
-    we'd rather not force a reinstall for them. Falls back to mtime
-    comparison if either lockfile is unparseable.
-    """
+    """True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull)."""
    ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
    if not ink.is_file():
        return True
@@ -861,35 +840,7 @@ def _tui_need_npm_install(root: Path) -> bool:
    marker = root / "node_modules" / ".package-lock.json"
    if not marker.is_file():
        return True
-
-    # Compare lockfile contents, not mtimes: git checkouts and npm rewrites
-    # can bump the root lockfile timestamp even when installed deps already
-    # match. Fall back to mtime when either file is unparseable.
-    try:
-        wanted = json.loads(lock.read_text(encoding="utf-8")).get("packages") or {}
-        installed = json.loads(marker.read_text(encoding="utf-8")).get("packages") or {}
-    except (OSError, UnicodeDecodeError, json.JSONDecodeError):
-        return lock.stat().st_mtime > marker.stat().st_mtime
-
-    def comparable(pkg: dict) -> dict:
-        return {k: v for k, v in pkg.items() if k not in _NPM_LOCK_RUNTIME_KEYS}
-
-    for name, pkg in wanted.items():
-        if not name:
-            continue
-
-        if not isinstance(pkg, dict):
-            continue
-
-        if name not in installed:
-            if pkg.get("optional") or pkg.get("peer"):
-                continue
-            return True
-
-        if isinstance(installed[name], dict) and comparable(pkg) != comparable(installed[name]):
-            return True
-
-    return False
+    return lock.stat().st_mtime > marker.stat().st_mtime


 def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
@@ -1528,21 +1479,6 @@ def cmd_model(args):
    select_provider_and_model(args=args)


-def _is_profile_api_key_provider(provider_id: str) -> bool:
-    """Return True when provider_id maps to a profile with auth_type='api_key'.
-
-    Used as a catch-all in select_provider_and_model() so that new providers
-    declared in providers/*.py automatically dispatch to _model_flow_api_key_provider
-    without requiring an explicit elif branch here.
-    """
-    try:
-        from providers import get_provider_profile
-        _p = get_provider_profile(provider_id)
-        return _p is not None and _p.auth_type == "api_key"
-    except Exception:
-        return False
-
-
 def select_provider_and_model(args=None):
    """Core provider selection + model picking logic.

@@ -1832,10 +1768,9 @@ def select_provider_and_model(args=None):
        "huggingface",
        "xiaomi",
        "arcee",
-        "gmi",
        "nvidia",
        "ollama-cloud",
-    ) or _is_profile_api_key_provider(selected_provider):
+    ):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -7633,22 +7568,6 @@ def cmd_logs(args):
    )


-def _build_provider_choices() -> list[str]:
-    """Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
-    try:
-        from hermes_cli.models import CANONICAL_PROVIDERS as _cp
-        return ["auto"] + [p.slug for p in _cp]
-    except Exception:
-        # Fallback: static list guarantees the CLI always works
-        return [
-            "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
-            "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
-            "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
-            "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
-            "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
-        ]
-
-
 def main():
    """Main entry point for hermes CLI."""
    parser = argparse.ArgumentParser(
@@ -7842,7 +7761,29 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=_build_provider_choices(),
+        choices=[
+            "auto",
+            "openrouter",
+            "nous",
+            "openai-codex",
+            "copilot-acp",
+            "copilot",
+            "anthropic",
+            "gemini",
+            "xai",
+            "ollama-cloud",
+            "huggingface",
+            "zai",
+            "kimi-coding",
+            "kimi-coding-cn",
+            "stepfun",
+            "minimax",
+            "minimax-cn",
+            "kilocode",
+            "xiaomi",
+            "arcee",
+            "nvidia",
+        ],
        default=None,
        help="Inference provider (default: auto)",
    )
@@ -278,14 +278,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "trinity-large-preview",
        "trinity-mini",
    ],
-    "gmi": [
-        "zai-org/GLM-5.1-FP8",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
-        "google/gemini-3.1-flash-lite-preview",
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-    ],
    "opencode-zen": [
        "kimi-k2.5",
        "gpt-5.4-pro",
@@ -717,6 +709,7 @@ class ProviderEntry(NamedTuple):
    label: str
    tui_desc: str   # detailed description for `hermes model` TUI

+
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
@@ -742,7 +735,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
-    ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
@@ -750,25 +742,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
 ]

-# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
-# that is not already in the list above.  Adding providers/*.py is sufficient
-# to expose a new provider in the model picker, /model, and all downstream
-# consumers — no edits to this file needed.
-_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
-try:
-    from providers import list_providers as _list_providers_for_canonical
-    for _pp in _list_providers_for_canonical():
-        if _pp.name in _canonical_slugs:
-            continue
-        if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
-            continue  # non-api-key flows need bespoke picker UX; skip auto-inject
-        _label = _pp.display_name or _pp.name
-        _desc = _pp.description or f"{_label} (direct API)"
-        CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
-        _canonical_slugs.add(_pp.name)
-except Exception:
-    pass
-
 # Derived dicts — used throughout the codebase
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
@@ -796,8 +769,6 @@ _PROVIDER_ALIASES = {
    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -1878,19 +1849,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
            except Exception:
                pass
-    if normalized == "gmi":
-        try:
-            from hermes_cli.auth import resolve_api_key_provider_credentials
-
-            creds = resolve_api_key_provider_credentials("gmi")
-            api_key = str(creds.get("api_key") or "").strip()
-            base_url = str(creds.get("base_url") or "").strip()
-            if api_key and base_url:
-                live = fetch_api_models(api_key, base_url)
-                if live:
-                    return live
-        except Exception:
-            pass
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -1903,34 +1861,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-
-    # ── Profile-based generic live fetch (all simple api-key providers) ──
-    # Handles any provider registered in providers/ with auth_type="api_key".
-    # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
-    try:
-        from providers import get_provider_profile
-        from hermes_cli.auth import resolve_api_key_provider_credentials
-
-        _p = get_provider_profile(normalized)
-        if _p and _p.auth_type == "api_key" and _p.base_url:
-            try:
-                creds = resolve_api_key_provider_credentials(normalized)
-                api_key = str(creds.get("api_key") or "").strip()
-                base_url = str(creds.get("base_url") or "").strip()
-            except Exception:
-                api_key, base_url = "", _p.base_url
-            if not base_url:
-                base_url = _p.base_url
-            if api_key:
-                live = _p.fetch_models(api_key=api_key)
-                if live:
-                    return live
-            # Use profile's fallback_models if defined
-            if _p.fallback_models:
-                return list(_p.fallback_models)
-    except Exception:
-        pass
-
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -79,20 +79,6 @@ VALID_HOOKS: Set[str] = {
    #   {"action": "allow"}  /  None             -> normal dispatch
    # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
    "pre_gateway_dispatch",
-    # Approval lifecycle hooks. Fired by tools/approval.py when a dangerous
-    # command needs user approval -- fires BOTH for CLI-interactive prompts
-    # and for gateway/ACP approvals (Telegram, Discord, Slack, TUI, etc.).
-    # Observers only: return values are ignored. Plugins cannot veto or
-    # pre-answer an approval from these hooks (use pre_tool_call to block
-    # a tool before it reaches approval).
-    #
-    # Kwargs for pre_approval_request:
-    #   command: str, description: str, pattern_key: str, pattern_keys: list[str],
-    #   session_key: str, surface: "cli" | "gateway"
-    # Kwargs for post_approval_response: same as above plus
-    #   choice: "once" | "session" | "always" | "deny" | "timeout"
-    "pre_approval_request",
-    "post_approval_response",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -163,12 +163,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("GMI_API_KEY",),
-        base_url_override="https://api.gmi-serving.com/v1",
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "ollama-cloud": HermesOverlay(
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
@@ -303,10 +297,6 @@ ALIASES: Dict[str, str] = {
    "arcee-ai": "arcee",
    "arceeai": "arcee",

-    # gmi
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
-
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -329,7 +319,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
-    "gmi": "GMI Cloud",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -214,6 +214,10 @@ def _resolve_runtime_from_pool_entry(
        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
    elif provider == "openrouter":
        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "xai":
+        api_mode = "codex_responses"
+    elif provider == "nous":
+        api_mode = "chat_completions"
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@@ -245,14 +249,6 @@ def _resolve_runtime_from_pool_entry(
            base_url = re.sub(r"/v1/?$", "", base_url)
    else:
        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
-        # Use profile api_mode for all other known providers
-        try:
-            from providers import get_provider_profile
-            _p = get_provider_profile(provider)
-            if _p and _p.api_mode:
-                api_mode = _p.api_mode
-        except Exception:
-            pass
        # Honour model.base_url from config.yaml when the configured provider
        # matches this provider — same pattern as the Anthropic branch above.
        # Only override when the pool entry has no explicit base_url (i.e. it
@@ -270,21 +266,12 @@ def _resolve_runtime_from_pool_entry(
            from hermes_cli.models import opencode_model_api_mode
            api_mode = opencode_model_api_mode(provider, effective_model)
        else:
-            # Try profile api_mode first, then auto-detect from URL
-            try:
-                from providers import get_provider_profile
-                _p = get_provider_profile(provider)
-                if _p and _p.api_mode:
-                    api_mode = _p.api_mode
-            except Exception:
-                pass
-            if api_mode == "chat_completions":
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-                # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
-                # codex_responses).
-                detected = _detect_api_mode_for_url(base_url)
-                if detected:
-                    api_mode = detected
+            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
+            detected = _detect_api_mode_for_url(base_url)
+            if detected:
+                api_mode = detected

    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
@@ -2212,7 +2212,7 @@ async def get_usage_analytics(days: int = 30):
        cutoff = time.time() - (days * 86400)
        cur = db._conn.execute("""
            SELECT date(started_at, 'unixepoch') as day,
-                   SUM(input_tokens) as input_tokens,
+                   SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   SUM(cache_read_tokens) as cache_read_tokens,
                   SUM(reasoning_tokens) as reasoning_tokens,
@@ -2227,18 +2227,18 @@ async def get_usage_analytics(days: int = 30):

        cur2 = db._conn.execute("""
            SELECT model,
-                   SUM(input_tokens) as input_tokens,
+                   SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                   COUNT(*) as sessions,
                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ? AND model IS NOT NULL
-            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
+            GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC
        """, (cutoff,))
        by_model = [dict(r) for r in cur2.fetchall()]

        cur3 = db._conn.execute("""
-            SELECT SUM(input_tokens) as total_input,
+            SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input,
                   SUM(output_tokens) as total_output,
                   SUM(cache_read_tokens) as total_cache_read,
                   SUM(reasoning_tokens) as total_reasoning,
@@ -22,8 +22,6 @@ import sqlite3
 import threading
 import time
 from pathlib import Path
-
-from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
 from typing import Any, Callable, Dict, List, Optional, TypeVar

@@ -33,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 10
+SCHEMA_VERSION = 9

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -121,32 +119,6 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
 END;
 """

-# Trigram FTS5 table for CJK substring search.  The default unicode61
-# tokenizer splits CJK characters into individual tokens, breaking phrase
-# matching.  The trigram tokenizer creates overlapping 3-byte sequences so
-# substring queries work natively for any script (CJK, Thai, etc.).
-FTS_TRIGRAM_SQL = """
-CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
-    content,
-    content=messages,
-    content_rowid=id,
-    tokenize='trigram'
-);
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-"""
-

 class SessionDB:
    """
@@ -394,18 +366,6 @@ class SessionDB:
                except sqlite3.OperationalError:
                    pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 9")
-            if current_version < 10:
-                # v10: trigram FTS5 table for CJK/substring search.
-                # Created via FTS_TRIGRAM_SQL below; backfill existing messages.
-                try:
-                    cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-                except sqlite3.OperationalError:
-                    cursor.executescript(FTS_TRIGRAM_SQL)
-                    cursor.execute(
-                        "INSERT INTO messages_fts_trigram(rowid, content) "
-                        "SELECT id, content FROM messages WHERE content IS NOT NULL"
-                    )
-                cursor.execute("UPDATE schema_version SET version = 10")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -423,12 +383,6 @@ class SessionDB:
        except sqlite3.OperationalError:
            cursor.executescript(FTS_SQL)

-        # Trigram FTS5 for CJK/substring search
-        try:
-            cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_TRIGRAM_SQL)
-
        self._conn.commit()

    # =========================================================================
@@ -1201,10 +1155,7 @@ class SessionDB:

        messages = []
        for row in rows:
-            content = row["content"]
-            if row["role"] in {"user", "assistant"} and isinstance(content, str):
-                content = sanitize_context(content).strip()
-            msg = {"role": row["role"], "content": content}
+            msg = {"role": row["role"], "content": row["content"]}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
            if row["tool_name"]:
@@ -1340,16 +1291,6 @@ class SessionDB:
        return sanitized.strip()


-    @staticmethod
-    def _is_cjk_codepoint(cp: int) -> bool:
-        return (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
-                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
-                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
-                0x3000 <= cp <= 0x303F or    # CJK Symbols
-                0x3040 <= cp <= 0x309F or    # Hiragana
-                0x30A0 <= cp <= 0x30FF or    # Katakana
-                0xAC00 <= cp <= 0xD7AF)      # Hangul Syllables
-
    @staticmethod
    def _contains_cjk(text: str) -> bool:
        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1365,11 +1306,6 @@ class SessionDB:
                return True
        return False

-    @classmethod
-    def _count_cjk(cls, text: str) -> int:
-        """Count CJK characters in text."""
-        return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
-
    def search_messages(
        self,
        query: str,
@@ -1440,113 +1376,52 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """

-        # CJK queries bypass the unicode61 FTS5 table.  The default tokenizer
-        # splits CJK characters into individual tokens, so "大别山项目" becomes
-        # "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
-        # missing exact phrase matches.
-        #
-        # For queries with 3+ CJK characters, we use the trigram FTS5 table
-        # (indexed substring matching with ranking and snippets).  For shorter
-        # CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
-        # bytes = 3 CJK chars), so we fall back to LIKE.
-        is_cjk = self._contains_cjk(query)
-        if is_cjk:
-            raw_query = query.strip('"').strip()
-            cjk_count = self._count_cjk(raw_query)
-
-            if cjk_count >= 3:
-                # Trigram FTS5 path — quote each non-operator token to handle
-                # FTS5 special chars (%, *, etc.) while preserving boolean
-                # operators (AND, OR, NOT) for multi-term queries.
-                tokens = raw_query.split()
-                parts = []
-                for tok in tokens:
-                    if tok.upper() in ("AND", "OR", "NOT"):
-                        parts.append(tok)
-                    else:
-                        parts.append('"' + tok.replace('"', '""') + '"')
-                trigram_query = " ".join(parts)
-                tri_where = ["messages_fts_trigram MATCH ?"]
-                tri_params: list = [trigram_query]
-                if source_filter is not None:
-                    tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    tri_params.extend(source_filter)
-                if exclude_sources is not None:
-                    tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    tri_params.extend(exclude_sources)
-                if role_filter:
-                    tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    tri_params.extend(role_filter)
-                tri_sql = f"""
-                    SELECT
-                        m.id,
-                        m.session_id,
-                        m.role,
-                        snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
-                        m.content,
-                        m.timestamp,
-                        m.tool_name,
-                        s.source,
-                        s.model,
-                        s.started_at AS session_started
-                    FROM messages_fts_trigram
-                    JOIN messages m ON m.id = messages_fts_trigram.rowid
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
-                    LIMIT ? OFFSET ?
-                """
-                tri_params.extend([limit, offset])
-                with self._lock:
-                    try:
-                        tri_cursor = self._conn.execute(tri_sql, tri_params)
-                    except sqlite3.OperationalError:
-                        matches = []
-                    else:
-                        matches = [dict(row) for row in tri_cursor.fetchall()]
-            else:
-                # Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
-                # Fall back to LIKE substring search.
-                escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-                like_where = ["m.content LIKE ? ESCAPE '\\'"]
-                like_params: list = [f"%{escaped}%"]
-                if source_filter is not None:
-                    like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    like_params.extend(source_filter)
-                if exclude_sources is not None:
-                    like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    like_params.extend(exclude_sources)
-                if role_filter:
-                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    like_params.extend(role_filter)
-                like_sql = f"""
-                    SELECT m.id, m.session_id, m.role,
-                           substr(m.content,
-                                  max(1, instr(m.content, ?) - 40),
-                                  120) AS snippet,
-                           m.content, m.timestamp, m.tool_name,
-                           s.source, s.model, s.started_at AS session_started
-                    FROM messages m
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(like_where)}
-                    ORDER BY m.timestamp DESC
-                    LIMIT ? OFFSET ?
-                """
-                like_params.extend([limit, offset])
-                # instr() parameter goes first in the bound list
-                like_params = [raw_query] + like_params
-                with self._lock:
-                    like_cursor = self._conn.execute(like_sql, like_params)
-                    matches = [dict(row) for row in like_cursor.fetchall()]
-        else:
-            with self._lock:
-                try:
-                    cursor = self._conn.execute(sql, params)
-                except sqlite3.OperationalError:
-                    # FTS5 query syntax error despite sanitization — return empty
+        with self._lock:
+            try:
+                cursor = self._conn.execute(sql, params)
+            except sqlite3.OperationalError:
+                # FTS5 query syntax error despite sanitization — return empty
+                # unless query contains CJK (fall back to LIKE below)
+                if not self._contains_cjk(query):
                    return []
-                else:
-                    matches = [dict(row) for row in cursor.fetchall()]
+                matches = []
+            else:
+                matches = [dict(row) for row in cursor.fetchall()]
+
+        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+        # characters individually, causing multi-character queries to fail.
+        if not matches and self._contains_cjk(query):
+            raw_query = query.strip('"').strip()
+            like_where = ["m.content LIKE ?"]
+            like_params: list = [f"%{raw_query}%"]
+            if source_filter is not None:
+                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                like_params.extend(source_filter)
+            if exclude_sources is not None:
+                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                like_params.extend(exclude_sources)
+            if role_filter:
+                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                like_params.extend(role_filter)
+            like_sql = f"""
+                SELECT m.id, m.session_id, m.role,
+                       substr(m.content,
+                              max(1, instr(m.content, ?) - 40),
+                              120) AS snippet,
+                       m.content, m.timestamp, m.tool_name,
+                       s.source, s.model, s.started_at AS session_started
+                FROM messages m
+                JOIN sessions s ON s.id = m.session_id
+                WHERE {' AND '.join(like_where)}
+                ORDER BY m.timestamp DESC
+                LIMIT ? OFFSET ?
+            """
+            like_params.extend([limit, offset])
+            # instr() parameter goes first in the bound list
+            like_params = [raw_query] + like_params
+            with self._lock:
+                like_cursor = self._conn.execute(like_sql, like_params)
+                matches = [dict(row) for row in like_cursor.fetchall()]

        # Add surrounding context (1 message before + after each match).
        # Done outside the lock so we don't hold it across N sequential queries.
@@ -7,7 +7,9 @@
  perSystem = { pkgs, system, lib, ... }:
    let
      hermes-agent = inputs.self.packages.${system}.default;
-      hermesVenv = hermes-agent.hermesVenv;
+      hermesVenv = pkgs.callPackage ./python.nix {
+        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };

      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };

@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
          echo "ok" > $out/result
        '';

-        # Verify extraPythonPackages PYTHONPATH injection
-        extra-python-packages = let
-          testPkg = pkgs.python312Packages.pyfiglet;
-          hermesWithExtra = hermes-agent.override {
-            extraPythonPackages = [ testPkg ];
-          };
-        in pkgs.runCommand "hermes-extra-python-packages" { } ''
-          set -e
-          echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
-
-          grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
-          echo "PASS: PYTHONPATH present in wrapper"
-
-          grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: test package path not in PYTHONPATH"; exit 1)
-          echo "PASS: test package path found in wrapper"
-
-          echo "=== Checking base package has no PYTHONPATH ==="
-          if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
-            echo "FAIL: base package should not have PYTHONPATH"; exit 1
-          fi
-          echo "PASS: base package clean"
-
-          echo "=== All extraPythonPackages checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
        # ── Config merge + round-trip test ────────────────────────────────
        # Tests the merge script (Nix activation behavior) across 7
        # scenarios, then verifies Python's load_config() reads correctly.
@@ -1,186 +0,0 @@
-# nix/hermes-agent.nix — Overridable Hermes Agent package
-#
-# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
-# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
-{
-  lib,
-  stdenv,
-  makeWrapper,
-  callPackage,
-  python312,
-  nodejs_22,
-  ripgrep,
-  git,
-  openssh,
-  ffmpeg,
-  tirith,
-  # Flake inputs — passed explicitly by packages.nix and overlays.nix
-  uv2nix,
-  pyproject-nix,
-  pyproject-build-systems,
-  npm-lockfile-fix,
-  # Overridable parameters
-  extraPythonPackages ? [ ],
-}:
-let
-  hermesVenv = callPackage ./python.nix {
-    inherit uv2nix pyproject-nix pyproject-build-systems;
-  };
-
-  hermesNpmLib = callPackage ./lib.nix {
-    inherit npm-lockfile-fix;
-  };
-
-  hermesTui = callPackage ./tui.nix {
-    inherit hermesNpmLib;
-  };
-
-  hermesWeb = callPackage ./web.nix {
-    inherit hermesNpmLib;
-  };
-
-  bundledSkills = lib.cleanSourceWith {
-    src = ../skills;
-    filter = path: _type: !(lib.hasInfix "/index-cache/" path);
-  };
-
-  runtimeDeps = [
-    nodejs_22
-    ripgrep
-    git
-    openssh
-    ffmpeg
-    tirith
-  ];
-
-  runtimePath = lib.makeBinPath runtimeDeps;
-
-  sitePackagesPath = python312.sitePackages;
-
-  # Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
-  # Without this, a plugin listing e.g. requests as a dep would fail at runtime
-  # if requests isn't already in the sealed uv2nix venv.
-  allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
-
-  pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
-
-  pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
-  uvLockHash =
-    if builtins.pathExists ../uv.lock then
-      builtins.hashString "sha256" (builtins.readFile ../uv.lock)
-    else
-      "none";
-in
-stdenv.mkDerivation {
-  pname = "hermes-agent";
-  version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
-
-  dontUnpack = true;
-  dontBuild = true;
-  nativeBuildInputs = [ makeWrapper ];
-
-  installPhase = ''
-    runHook preInstall
-
-    mkdir -p $out/share/hermes-agent $out/bin
-    cp -r ${bundledSkills} $out/share/hermes-agent/skills
-    cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
-
-    mkdir -p $out/ui-tui
-    cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
-
-    ${lib.concatMapStringsSep "\n"
-      (name: ''
-        makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
-          --suffix PATH : "${runtimePath}" \
-          --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
-          --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
-          --set HERMES_TUI_DIR $out/ui-tui \
-          --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
-          --set HERMES_NODE ${nodejs_22}/bin/node \
-          ${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
-      '')
-      [
-        "hermes"
-        "hermes-agent"
-        "hermes-acp"
-      ]
-    }
-
-    ${lib.optionalString (extraPythonPackages != [ ]) ''
-      echo "=== Checking for plugin/core package collisions ==="
-      ${hermesVenv}/bin/python3 -c "
-import pathlib, sys, re
-
-def canonical(name):
-    return re.sub(r'[-_.]+', '-', name).lower()
-
-# Collect core venv package names
-core = set()
-venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
-for di in venv_sp.glob('*.dist-info'):
-    meta = di / 'METADATA'
-    if meta.exists():
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                core.add(canonical(line.split(':', 1)[1].strip()))
-                break
-
-# Check each extra package for collisions
-extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
-for edir in extras_dirs:
-    sp = pathlib.Path(edir) / '${sitePackagesPath}'
-    if not sp.exists():
-        continue
-    for di in sp.glob('*.dist-info'):
-        meta = di / 'METADATA'
-        if not meta.exists():
-            continue
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                pkg = canonical(line.split(':', 1)[1].strip())
-                if pkg in core:
-                    print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
-                    print(f'  from: {di}', file=sys.stderr)
-                    print(f'  Remove this dependency from extraPythonPackages.', file=sys.stderr)
-                    sys.exit(1)
-                break
-
-print('No collisions found.')
-      "
-      echo "=== No collisions ==="
-    ''}
-
-    runHook postInstall
-  '';
-
-  passthru = {
-    inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
-
-    devShellHook = ''
-      STAMP=".nix-stamps/hermes-agent"
-      STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
-      if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-        echo "hermes-agent: installing Python dependencies..."
-        uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
-        source .venv/bin/activate
-        uv pip install -e ".[all]"
-        [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
-        [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
-        mkdir -p .nix-stamps
-        echo "$STAMP_VALUE" > "$STAMP"
-      else
-        source .venv/bin/activate
-        export HERMES_PYTHON=${hermesVenv}/bin/python3
-      fi
-    '';
-  };
-
-  meta = with lib; {
-    description = "AI agent with advanced tool-calling capabilities";
-    homepage = "https://github.com/NousResearch/hermes-agent";
-    mainProgram = "hermes";
-    license = licenses.mit;
-    platforms = platforms.unix;
-  };
-}
@@ -28,8 +28,6 @@

  let
    cfg = config.services.hermes-agent;
-    effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
-      else cfg.package.override { inherit (cfg) extraPythonPackages; };
    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -458,52 +456,6 @@
        description = "Extra packages available on PATH.";
      };

-      extraPlugins = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Directory-based plugin packages to symlink into the hermes plugins
-          directory. Each package should contain a plugin.yaml and __init__.py
-          at its root. Hermes discovers these automatically on startup.
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.fetchFromGitHub {
-              owner = "stephenschoettler";
-              repo = "hermes-lcm";
-              name = "hermes-lcm";
-              rev = "v0.7.0";
-              hash = "sha256-...";
-            })
-          ]
-        '';
-      };
-
-      extraPythonPackages = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Python packages to add to PYTHONPATH for entry-point plugin discovery.
-          These are pip-packaged plugins that register via the
-          hermes_agent.plugins entry-point group. Each package must be built
-          with the same Python interpreter as hermes (python312).
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.python312Packages.buildPythonPackage {
-              pname = "rtk-hermes";
-              version = "1.0.0";
-              src = pkgs.fetchFromGitHub {
-                owner = "ogallotti";
-                repo = "rtk-hermes";
-                rev = "main";
-                hash = "sha256-...";
-              };
-            })
-          ]
-        '';
-      };
-
      restart = mkOption {
        type = types.str;
        default = "always";
@@ -618,7 +570,7 @@
      # so interactive shells share state (sessions, skills, cron) with the
      # gateway service instead of creating a separate ~/.hermes/.
      (lib.mkIf cfg.addToSystemPackages {
-        environment.systemPackages = [ effectivePackage ];
+        environment.systemPackages = [ cfg.package ];
        environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
      })

@@ -629,16 +581,6 @@
        });
      })

-      # ── Assertions ─────────────────────────────────────────────────────
-      {
-        assertions = let
-          names = map lib.getName cfg.extraPlugins;
-        in [{
-          assertion = (lib.length names) == (lib.length (lib.unique names));
-          message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
-        }];
-      }
-
      # ── Warnings ──────────────────────────────────────────────────────
      (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
        warnings = [
@@ -660,7 +602,6 @@
          "d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/logs   2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.workingDirectory}         2770 ${cfg.user} ${cfg.group} - -"
        ];
@@ -682,7 +623,7 @@
          find ${cfg.stateDir}/.hermes -maxdepth 1 \
            \( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
            -exec chmod g+rw {} + 2>/dev/null || true
-          for _subdir in cron sessions logs memories plugins; do
+          for _subdir in cron sessions logs memories; do
            mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
            chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
            chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -791,22 +732,6 @@ HERMES_NIX_ENV_EOF
          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
            install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
          '') cfg.documents)}
-
-        # ── Declarative plugins ─────────────────────────────────────────
-        # Remove stale managed symlinks (plugins removed from config)
-        find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
-
-        ${lib.concatStringsSep "\n" (map (plugin:
-          let
-            name = lib.getName plugin;
-          in ''
-            if [ ! -f "${plugin}/plugin.yaml" ]; then
-              echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
-              exit 1
-            fi
-            ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-            chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-          '') cfg.extraPlugins)}
        '';
      }

@@ -837,7 +762,7 @@ HERMES_NIX_ENV_EOF
            # reads them at Python startup — no systemd EnvironmentFile needed.

            ExecStart = lib.concatStringsSep " " ([
-              "${effectivePackage}/bin/hermes"
+              "${cfg.package}/bin/hermes"
              "gateway"
            ] ++ cfg.extraArgs);

@@ -860,7 +785,7 @@ HERMES_NIX_ENV_EOF
          };

          path = [
-            effectivePackage
+            cfg.package
            pkgs.bash
            pkgs.coreutils
            pkgs.git
@@ -885,11 +810,11 @@ HERMES_NIX_ENV_EOF

          preStart = ''
            # Stable symlinks — container references these, not store paths directly
-            ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
+            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint

            # GC roots so nix-collect-garbage doesn't remove store paths in use
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true

            # Check if container needs (re)creation
@@ -1,10 +0,0 @@
-# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
-{ inputs, ... }:
-{
-  flake.overlays.default = final: _: {
-    hermes-agent = final.callPackage ./hermes-agent.nix {
-      inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
-    };
-  };
-}
@@ -4,19 +4,120 @@
  perSystem =
    { pkgs, inputs', ... }:
    let
-      hermesAgent = pkgs.callPackage ./hermes-agent.nix {
+      hermesVenv = pkgs.callPackage ./python.nix {
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };
+
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };
+
+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
+      # Import bundled skills, excluding runtime caches
+      bundledSkills = pkgs.lib.cleanSourceWith {
+        src = ../skills;
+        filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
+      };
+
+      hermesWeb = pkgs.callPackage ./web.nix {
+        inherit hermesNpmLib;
+      };
+
+      runtimeDeps = with pkgs; [
+        nodejs_22
+        ripgrep
+        git
+        openssh
+        ffmpeg
+        tirith
+      ];
+
+      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
+
+      # Lockfile hashes for dev shell stamps
+      pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
+      uvLockHash =
+        if builtins.pathExists ../uv.lock then
+          builtins.hashString "sha256" (builtins.readFile ../uv.lock)
+        else
+          "none";
    in
    {
      packages = {
-        default = hermesAgent;
-        tui = hermesAgent.hermesTui;
-        web = hermesAgent.hermesWeb;
+        default = pkgs.stdenv.mkDerivation {
+          pname = "hermes-agent";
+          version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;

-        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
-          packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
+          dontUnpack = true;
+          dontBuild = true;
+          nativeBuildInputs = [ pkgs.makeWrapper ];
+
+          installPhase = ''
+            runHook preInstall
+
+            mkdir -p $out/share/hermes-agent $out/bin
+            cp -r ${bundledSkills} $out/share/hermes-agent/skills
+            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
+
+            # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
+            mkdir -p $out/ui-tui
+            cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
+
+            ${pkgs.lib.concatMapStringsSep "\n"
+              (name: ''
+                makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
+                  --suffix PATH : "${runtimePath}" \
+                  --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
+                  --set HERMES_TUI_DIR $out/ui-tui \
+                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
+              '')
+              [
+                "hermes"
+                "hermes-agent"
+                "hermes-acp"
+              ]
+            }
+
+            runHook postInstall
+          '';
+
+          passthru.devShellHook = ''
+            STAMP=".nix-stamps/hermes-agent"
+            STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
+            if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+              echo "hermes-agent: installing Python dependencies..."
+              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
+              source .venv/bin/activate
+              uv pip install -e ".[all]"
+              [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
+              [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
+              mkdir -p .nix-stamps
+              echo "$STAMP_VALUE" > "$STAMP"
+            else
+              source .venv/bin/activate
+              export HERMES_PYTHON=${hermesVenv}/bin/python3
+            fi
+          '';
+
+          meta = with pkgs.lib; {
+            description = "AI agent with advanced tool-calling capabilities";
+            homepage = "https://github.com/NousResearch/hermes-agent";
+            mainProgram = "hermes";
+            license = licenses.mit;
+            platforms = platforms.unix;
+          };
+        };
+
+        tui = hermesTui;
+        web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
        };
      };
    };
@@ -7,7 +7,6 @@
  pyproject-nix,
  pyproject-build-systems,
  stdenv,
-  dependency-groups ? [ "all" ],
 }:
 let
  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -97,5 +96,5 @@ let
      ]);
 in
 pythonSet.mkVirtualEnv "hermes-agent-env" {
-  hermes-agent = dependency-groups;
+  hermes-agent = [ "all" ];
 }
@@ -204,9 +204,8 @@ win.par.winopen.pulse()
 | `td_input_clear` | Stop input automation |
 | `td_op_screen_rect` | Get screen coords of a node |
 | `td_click_screen_point` | Click a point in a screenshot |
-| `td_screen_point_to_global` | Convert screenshot pixel to absolute screen coords |

-The table above covers the 32 tools used in typical creative workflows. The remaining 4 tools (`td_project_quit`, `td_test_session`, `td_dev_log`, `td_clear_dev_log`) are admin/dev-mode utilities — see `references/mcp-tools.md` for the full 36-tool reference with complete parameter schemas.
+See `references/mcp-tools.md` for full parameter schemas.

 ## Key Implementation Rules

@@ -339,15 +338,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
 | `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
 | `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
 | `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
-| `references/animation.md` | LFOs, timers, keyframes, easing, expression-driven motion |
-| `references/midi-osc.md` | MIDI/OSC controllers, TouchOSC, multi-machine sync |
-| `references/particles.md` | POPs and legacy particleSOP — emission, forces, collisions |
-| `references/projection-mapping.md` | Multi-window output, corner pin, mesh warp, edge blending |
-| `references/external-data.md` | HTTP, WebSocket, MQTT, Serial, TCP, webserverDAT |
-| `references/panel-ui.md` | Custom params, panel COMPs, button/slider/field, panelExecuteDAT |
-| `references/replicator.md` | replicatorCOMP — data-driven cloning, layouts, callbacks |
-| `references/dat-scripting.md` | Execute DAT family — chop/dat/parameter/panel/op/executeDAT |
-| `references/3d-scene.md` | Lighting rigs, shadows, IBL/cubemaps, multi-camera, PBR |
 | `scripts/setup.sh` | Automated setup script |

 ---
@@ -22,7 +22,6 @@ import threading
 import time
 from typing import Any, Dict, List, Optional

-from agent.memory_manager import sanitize_context
 from agent.memory_provider import MemoryProvider
 from tools.registry import tool_error

@@ -38,10 +37,7 @@ PROFILE_SCHEMA = {
    "description": (
        "Retrieve or update a peer card from Honcho — a curated list of key facts "
        "about that peer (name, role, preferences, communication style, patterns). "
-        "Pass `card` to update; omit `card` to read.  If the card is empty, the "
-        "result includes a `hint` field explaining why (observation disabled, "
-        "fresh peer, dialectic layer still warming up, etc.) — this is NOT an "
-        "error.  Peer cards accumulate over time from observed conversation."
+        "Pass `card` to update; omit `card` to read."
    ),
    "parameters": {
        "type": "object",
@@ -1060,63 +1056,6 @@ class HonchoMemoryProvider(MemoryProvider):

        return chunks

-    def _empty_profile_hint(self, peer: str) -> Dict[str, Any]:
-        """Build a diagnostic hint when honcho_profile returns an empty card.
-
-        A literal "No profile facts available yet." tells the model nothing
-        about WHY.  The model then often surfaces it to the user as a cryptic
-        error.  This hint enumerates the likely causes so the model can
-        explain the situation (or retry with a different peer).
-
-        Ordered by likelihood for a typical deployment:
-          1. Observation is disabled for this peer
-          2. Card hasn't accumulated yet (fresh peer, not enough dialectic
-             cycles — dialectic cadence runs every N turns)
-          3. Self-hosted Honcho backend doesn't support peer cards
-             (honcho-ai server < 3.x)
-        """
-        cfg = self._config
-        reasons: List[str] = []
-
-        if cfg is not None:
-            if peer == "user":
-                observe_me = bool(getattr(cfg, "user_observe_me", True))
-                observe_others = bool(getattr(cfg, "user_observe_others", True))
-            else:
-                observe_me = bool(getattr(cfg, "ai_observe_me", True))
-                observe_others = bool(getattr(cfg, "ai_observe_others", True))
-            if not (observe_me or observe_others):
-                reasons.append(
-                    f"observation is disabled for peer '{peer}' "
-                    f"(user_observe_me/ai_observe_me in config)"
-                )
-
-        cadence = getattr(self, "_dialectic_cadence", 1)
-        turn = getattr(self, "_turn_count", 0)
-        if turn < max(2, cadence):
-            reasons.append(
-                f"this session has only {turn} turn(s); peer cards accumulate "
-                f"as the dialectic layer reasons over conversation history "
-                f"(cadence every {cadence} turn(s))"
-            )
-
-        if not reasons:
-            reasons.append(
-                "peer card has no facts yet — Honcho's dialectic layer builds "
-                "this over time from observed turns; self-hosted Honcho < 3.x "
-                "does not support peer cards at all"
-            )
-
-        return {
-            "result": "No profile facts available yet.",
-            "hint": (
-                "This is not an error.  "
-                + "; ".join(reasons)
-                + ".  Try honcho_reasoning for a synthesized answer, or "
-                "honcho_search to query raw conversation excerpts."
-            ),
-        }
-
    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
        """Record the conversation turn in Honcho (non-blocking).

@@ -1129,15 +1068,13 @@ class HonchoMemoryProvider(MemoryProvider):
            return

        msg_limit = self._config.message_max_chars if self._config else 25000
-        clean_user_content = sanitize_context(user_content or "").strip()
-        clean_assistant_content = sanitize_context(assistant_content or "").strip()

        def _sync():
            try:
                session = self._manager.get_or_create(self._session_key)
-                for chunk in self._chunk_message(clean_user_content, msg_limit):
+                for chunk in self._chunk_message(user_content, msg_limit):
                    session.add_message("user", chunk)
-                for chunk in self._chunk_message(clean_assistant_content, msg_limit):
+                for chunk in self._chunk_message(assistant_content, msg_limit):
                    session.add_message("assistant", chunk)
                self._manager._flush_session(session)
            except Exception as e:
@@ -1150,20 +1087,8 @@ class HonchoMemoryProvider(MemoryProvider):
        )
        self._sync_thread.start()

-    def on_memory_write(
-        self,
-        action: str,
-        target: str,
-        content: str,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """Mirror built-in user profile writes as Honcho conclusions.
-
-        ``metadata`` is accepted for compatibility with the write-origin
-        work landed in main (commit 6a957a74); it's not yet threaded into
-        the Honcho conclusion payload.  Left as a follow-up so this PR
-        stays focused on the 7-PR consolidation and its review follow-ups.
-        """
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in user profile writes as Honcho conclusions."""
        if action != "add" or target != "user" or not content:
            return
        if self._cron_skipped:
@@ -1229,7 +1154,7 @@ class HonchoMemoryProvider(MemoryProvider):
                    return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
                card = self._manager.get_peer_card(self._session_key, peer=peer)
                if not card:
-                    return json.dumps(self._empty_profile_hint(peer))
+                    return json.dumps({"result": "No profile facts available yet."})
                return json.dumps({"result": card})

            elif tool_name == "honcho_search":
@@ -273,38 +273,9 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:


 def _resolve_api_key(cfg: dict) -> str:
-    """Resolve API key with host -> root -> env fallback.
-
-    For self-hosted instances configured with ``baseUrl`` instead of an API
-    key, returns ``"local"`` so that credential guards throughout the CLI
-    don't reject a valid configuration.  The ``baseUrl`` is scheme-validated
-    (http/https only) so that a typo like ``baseUrl: true`` can't silently
-    pass the guard.  Schemeless strings that look like host:port (legacy
-    config shapes, e.g. ``localhost:8000``) still pass — the Honcho SDK
-    will reject them itself with a clearer error than ours.
-    """
+    """Resolve API key with host -> root -> env fallback."""
    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
-    key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
-    if not key:
-        base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
-        base_url = (base_url or "").strip()
-        if base_url:
-            from urllib.parse import urlparse
-            try:
-                parsed = urlparse(base_url)
-            except (TypeError, ValueError):
-                parsed = None
-            if parsed and parsed.scheme in ("http", "https") and parsed.netloc:
-                return "local"
-            # Schemeless but looks like a host (contains '.' or ':' and isn't
-            # a boolean literal): let it through so legacy configs don't
-            # regress into "no API key configured" when they previously worked.
-            lowered = base_url.lower()
-            if lowered not in ("true", "false", "none", "null") and any(
-                c in base_url for c in ".:"
-            ) and not base_url.isdigit():
-                return "local"
-    return key
+    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")


 def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
@@ -16,7 +16,6 @@ from __future__ import annotations
 import json
 import os
 import logging
-import hashlib
 from dataclasses import dataclass, field
 from pathlib import Path

@@ -28,6 +27,7 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"


@@ -53,11 +53,6 @@ def resolve_active_host() -> str:
    return HOST


-def resolve_global_config_path() -> Path:
-    """Return the shared Honcho config path for the current HOME."""
-    return Path.home() / ".honcho" / "config.json"
-
-
 def resolve_config_path() -> Path:
    """Return the active Honcho config path.

@@ -77,7 +72,7 @@ def resolve_config_path() -> Path:
    if default_path != local_path and default_path.exists():
        return default_path

-    return resolve_global_config_path()
+    return GLOBAL_CONFIG_PATH


 _RECALL_MODE_ALIASES = {"auto": "hybrid"}
@@ -143,15 +138,6 @@ def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] |
    return None


-# Default HTTP timeout (seconds) applied when no explicit timeout is
-# configured via HonchoClientConfig.timeout, honcho.timeout / requestTimeout,
-# or HONCHO_TIMEOUT. Honcho calls happen on the post-response path of
-# run_conversation; without a cap the agent can block indefinitely when
-# the Honcho backend is unreachable, preventing the gateway from
-# delivering the already-generated response.
-_DEFAULT_HTTP_TIMEOUT = 30.0
-
-
 def _resolve_optional_float(*values: Any) -> float | None:
    """Return the first non-empty value coerced to a positive float."""
    for value in values:
@@ -240,13 +226,6 @@ class HonchoClientConfig:
    # Identity
    peer_name: str | None = None
    ai_peer: str = "hermes"
-    # When True, ``peer_name`` wins over any gateway-supplied runtime
-    # identity (Telegram UID, Discord ID, …) when resolving the user peer.
-    # This keeps memory unified across platforms for single-user deployments
-    # where Honcho's one peer-name is an unambiguous identity — otherwise
-    # each platform would fork memory into its own peer (#14984).  Default
-    # ``False`` preserves existing multi-user behaviour.
-    pin_peer_name: bool = False
    # Toggles
    enabled: bool = False
    save_messages: bool = True
@@ -441,11 +420,6 @@ class HonchoClientConfig:
            timeout=timeout,
            peer_name=host_block.get("peerName") or raw.get("peerName"),
            ai_peer=ai_peer,
-            pin_peer_name=_resolve_bool(
-                host_block.get("pinPeerName"),
-                raw.get("pinPeerName"),
-                default=False,
-            ),
            enabled=enabled,
            save_messages=save_messages,
            write_frequency=write_frequency,
@@ -548,39 +522,6 @@ class HonchoClientConfig:
            pass
        return None

-    # Honcho enforces a 100-char limit on session IDs. Long gateway session keys
-    # (Matrix "!room:server" + thread event IDs, Telegram supergroup reply
-    # chains, Slack thread IDs with long workspace prefixes) can overflow this
-    # limit after sanitization; the Honcho API then rejects every call for that
-    # session with "session_id too long". See issue #13868.
-    _HONCHO_SESSION_ID_MAX_LEN = 100
-    _HONCHO_SESSION_ID_HASH_LEN = 8
-
-    @classmethod
-    def _enforce_session_id_limit(cls, sanitized: str, original: str) -> str:
-        """Truncate a sanitized session ID to Honcho's 100-char limit.
-
-        The common case (short keys) short-circuits with no modification.
-        For over-limit keys, keep a prefix of the sanitized ID and append a
-        deterministic ``-<sha256 prefix>`` suffix so two distinct long keys
-        that share a leading segment don't collide onto the same truncated ID.
-        The hash is taken over the *original* pre-sanitization key, so two
-        inputs that sanitize to the same string still collide intentionally
-        (same logical session), but two inputs that only share a prefix do not.
-        """
-        max_len = cls._HONCHO_SESSION_ID_MAX_LEN
-        if len(sanitized) <= max_len:
-            return sanitized
-
-        hash_len = cls._HONCHO_SESSION_ID_HASH_LEN
-        digest = hashlib.sha256(original.encode("utf-8")).hexdigest()[:hash_len]
-        # max_len - hash_len - 1 (for the '-' separator) chars of the sanitized
-        # prefix, then '-<hash>'. Strip any trailing hyphen from the prefix so
-        # the result doesn't double up on separators.
-        prefix_len = max_len - hash_len - 1
-        prefix = sanitized[:prefix_len].rstrip("-")
-        return f"{prefix}-{digest}"
-
    def resolve_session_name(
        self,
        cwd: str | None = None,
@@ -625,7 +566,7 @@ class HonchoClientConfig:
        if gateway_session_key:
            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
            if sanitized:
-                return self._enforce_session_id_limit(sanitized, gateway_session_key)
+                return sanitized

        # per-session: inherit Hermes session_id (new Honcho session each run)
        if self.session_strategy == "per-session" and session_id:
@@ -705,11 +646,6 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
        except Exception:
            pass

-    # Fall back to the default so an unconfigured install cannot hang
-    # indefinitely on a stalled Honcho request.
-    if resolved_timeout is None:
-        resolved_timeout = _DEFAULT_HTTP_TIMEOUT
-
    if resolved_base_url:
        logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
    else:
@@ -95,7 +95,6 @@ class HonchoSessionManager:
        self._config = config
        self._runtime_user_peer_name = runtime_user_peer_name
        self._cache: dict[str, HonchoSession] = {}
-        self._cache_lock = threading.RLock()
        self._peers_cache: dict[str, Any] = {}
        self._sessions_cache: dict[str, Any] = {}

@@ -274,35 +273,17 @@ class HonchoSessionManager:
        Returns:
            The session.
        """
-        with self._cache_lock:
-            if key in self._cache:
-                logger.debug("Local session cache hit: %s", key)
-                return self._cache[key]
+        if key in self._cache:
+            logger.debug("Local session cache hit: %s", key)
+            return self._cache[key]

-        # Determine peer IDs — no lock needed (read-only, no shared state mutation).
-        # Gateway sessions normally use the runtime user identity (the
-        # platform-native ID: Telegram UID, Discord snowflake, Slack user,
-        # etc.) so multi-user bots scope memory per user.  For a single-user
-        # deployment the config-supplied ``peer_name`` is an unambiguous
-        # identity and we should keep it unified across platforms — see
-        # #14984.  Opt into that with ``hosts.<host>.pinPeerName: true`` in
-        # ``honcho.json`` (or root-level ``pinPeerName: true``).
-        # `is True` (not `bool(...)`) is deliberate: several multi-user tests
-        # pass a ``MagicMock`` for ``config`` where ``mock.pin_peer_name``
-        # silently returns another MagicMock — truthy by default.  Requiring
-        # strict ``True`` keeps pinning as opt-in even for callers that
-        # haven't updated their mocks yet; real configs built via
-        # ``from_global_config`` always produce a proper boolean.
-        pin_peer_name = (
-            self._config is not None
-            and bool(getattr(self._config, "peer_name", None))
-            and getattr(self._config, "pin_peer_name", False) is True
-        )
-        if self._runtime_user_peer_name and not pin_peer_name:
+        # Gateway sessions should use the runtime user identity when available.
+        if self._runtime_user_peer_name:
            user_peer_id = self._sanitize_id(self._runtime_user_peer_name)
        elif self._config and self._config.peer_name:
            user_peer_id = self._sanitize_id(self._config.peer_name)
        else:
+            # Fallback: derive from session key
            parts = key.split(":", 1)
            channel = parts[0] if len(parts) > 1 else "default"
            chat_id = parts[1] if len(parts) > 1 else key
@@ -312,14 +293,19 @@ class HonchoSessionManager:
            self._config.ai_peer if self._config else "hermes-assistant"
        )

-        # All expensive I/O outside the lock — Honcho's persistence is source of truth
+        # Sanitize session ID for Honcho
        honcho_session_id = self._sanitize_id(key)
+
+        # Get or create peers
        user_peer = self._get_or_create_peer(user_peer_id)
        assistant_peer = self._get_or_create_peer(assistant_peer_id)
+
+        # Get or create Honcho session
        honcho_session, existing_messages = self._get_or_create_honcho_session(
            honcho_session_id, user_peer, assistant_peer
        )

+        # Convert Honcho messages to local format
        local_messages = []
        for msg in existing_messages:
            role = "assistant" if msg.peer_id == assistant_peer_id else "user"
@@ -327,9 +313,10 @@ class HonchoSessionManager:
                "role": role,
                "content": msg.content,
                "timestamp": msg.created_at.isoformat() if msg.created_at else "",
-                "_synced": True,
+                "_synced": True,  # Already in Honcho
            })

+        # Create local session wrapper with existing messages
        session = HonchoSession(
            key=key,
            user_peer_id=user_peer_id,
@@ -338,9 +325,7 @@ class HonchoSessionManager:
            messages=local_messages,
        )

-        # Write to cache under lock — only one writer wins
-        with self._cache_lock:
-            self._cache[key] = session
+        self._cache[key] = session
        return session

    def _flush_session(self, session: HonchoSession) -> bool:
@@ -371,15 +356,13 @@ class HonchoSessionManager:
            for msg in new_messages:
                msg["_synced"] = True
            logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
-            with self._cache_lock:
-                self._cache[session.key] = session
+            self._cache[session.key] = session
            return True
        except Exception as e:
            for msg in new_messages:
                msg["_synced"] = False
            logger.error("Failed to sync messages to Honcho: %s", e)
-            with self._cache_lock:
-                self._cache[session.key] = session
+            self._cache[session.key] = session
            return False

    def _async_writer_loop(self) -> None:
@@ -451,9 +434,7 @@ class HonchoSessionManager:
        Called at session end for "session" write_frequency, or to force
        a sync before process exit regardless of mode.
        """
-        with self._cache_lock:
-            sessions = list(self._cache.values())
-        for session in sessions:
+        for session in list(self._cache.values()):
            try:
                self._flush_session(session)
            except Exception as e:
@@ -478,10 +459,9 @@ class HonchoSessionManager:

    def delete(self, key: str) -> bool:
        """Delete a session from local cache."""
-        with self._cache_lock:
-            if key in self._cache:
-                del self._cache[key]
-                return True
+        if key in self._cache:
+            del self._cache[key]
+            return True
        return False

    def new_session(self, key: str) -> HonchoSession:
@@ -493,25 +473,20 @@ class HonchoSessionManager:
        """
        import time

-        # Hold the reentrant lock across get_or_create so a concurrent caller
-        # can't observe the (old-popped, new-not-yet-inserted) gap and create
-        # its own session under the raw key.  `_cache_lock` is an RLock so
-        # nested reacquisition inside get_or_create is safe.
-        with self._cache_lock:
-            # Remove old session from caches (but don't delete from Honcho)
-            old_session = self._cache.pop(key, None)
-            if old_session:
-                self._sessions_cache.pop(old_session.honcho_session_id, None)
+        # Remove old session from caches (but don't delete from Honcho)
+        old_session = self._cache.pop(key, None)
+        if old_session:
+            self._sessions_cache.pop(old_session.honcho_session_id, None)

-            # Create new session with timestamp suffix
-            timestamp = int(time.time())
-            new_key = f"{key}:{timestamp}"
+        # Create new session with timestamp suffix
+        timestamp = int(time.time())
+        new_key = f"{key}:{timestamp}"

-            # get_or_create will create a fresh session
-            session = self.get_or_create(new_key)
+        # get_or_create will create a fresh session
+        session = self.get_or_create(new_key)

-            # Cache under the original key so callers find it by the expected name
-            self._cache[key] = session
+        # Cache under the original key so callers find it by the expected name
+        self._cache[key] = session

        logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
        return session
@@ -1,307 +0,0 @@
-# providers/
-
-Single source of truth for every inference provider Hermes knows about.
-
-Each provider is declared once here as a `ProviderProfile`. Every other layer —
-auth resolution, transport kwargs, model listing, runtime routing — reads from
-these profiles instead of maintaining its own parallel data.
-
---
-
-## Directory layout
-
-```
-providers/
-├── base.py           ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
-├── __init__.py       Registry: register_provider(), get_provider_profile()
-├── README.md         This file
-│
-├── # Simple providers — just identity + auth + endpoint
-├── alibaba.py        Alibaba Cloud DashScope
-├── arcee.py          Arcee AI
-├── bedrock.py        AWS Bedrock  (api_mode=bedrock_converse)
-├── deepseek.py       DeepSeek
-├── huggingface.py    Hugging Face Inference API
-├── kilocode.py       Kilo Code
-├── minimax.py        MiniMax (international + CN)
-├── nvidia.py         NVIDIA NIM  (default_max_tokens=16384)
-├── ollama_cloud.py   Ollama Cloud
-├── stepfun.py        StepFun
-├── xiaomi.py         Xiaomi MiMo
-├── xai.py            xAI Grok  (api_mode=codex_responses)
-├── zai.py            Z.AI / GLM
-│
-├── # Medium — one or two quirks
-├── anthropic.py      Native Anthropic  (x-api-key header, api_mode=anthropic_messages)
-├── copilot.py        GitHub Copilot  (auth_type=copilot, reasoning per model)
-├── copilot_acp.py    Copilot ACP subprocess  (api_mode=copilot_acp)
-├── custom.py         Custom/Ollama local  (think=false, num_ctx)
-├── gemini.py         Google Gemini AI Studio + Cloud Code OAuth
-├── kimi.py           Kimi Coding  (OMIT_TEMPERATURE, thinking, dual endpoint)
-├── openai_codex.py   OpenAI Codex OAuth  (api_mode=codex_responses)
-├── opencode.py       OpenCode Zen + Go  (per-model api_mode routing)
-│
-├── # Complex — subclasses with multiple overrides
-├── nous.py           Nous Portal  (tags, attribution, reasoning omit-when-disabled)
-├── openrouter.py     OpenRouter  (provider preferences, public model fetch)
-├── qwen.py           Qwen OAuth  (message normalization, cache_control, vl_hires)
-└── vercel.py         Vercel AI Gateway  (attribution headers, reasoning passthrough)
-```
-
---
-
-## ProviderProfile fields
-
-```python
-@dataclass
-class ProviderProfile:
-    # Identity
-    name: str                    # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
-    api_mode: str                # "chat_completions" | "anthropic_messages" |
-                                 # "codex_responses" | "bedrock_converse" | "copilot_acp"
-    aliases: tuple               # alternate names resolved by get_provider_profile()
-
-    # Auth & endpoints
-    env_vars: tuple              # env var names holding the API key, in priority order
-    base_url: str                # default inference endpoint
-    models_url: str              # explicit models endpoint; falls back to {base_url}/models
-                                 # set when the models catalog lives at a different URL
-                                 # (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
-    auth_type: str               # "api_key" | "oauth_device_code" | "oauth_external" |
-                                 # "copilot" | "aws" | "external_process"
-
-    # Client-level quirks
-    default_headers: dict        # extra HTTP headers sent on every request
-
-    # Request-level quirks
-    fixed_temperature: Any       # None = use caller's default; OMIT_TEMPERATURE = don't send
-    default_max_tokens: int|None # inject max_tokens when caller omits it
-    default_aux_model: str       # cheap model for auxiliary tasks (compression, vision, etc.)
-                                 # empty string = use main model (default)
-```
-
---
-
-## Hooks (override in a subclass)
-
-| Method | When to override |
-|--------|-----------------|
-| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
-| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
-| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
-| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
-
-All hooks have safe defaults — only override what differs from the base.
-
---
-
-## How to add a new provider
-
-### 1. Simple (standard OpenAI-compatible endpoint)
-
-```python
-# providers/myprovider.py
-from providers import register_provider
-from providers.base import ProviderProfile
-
-myprovider = ProviderProfile(
-    name="myprovider",           # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
-    aliases=("my-provider", "myp"),
-    api_mode="chat_completions",
-    env_vars=("MYPROVIDER_API_KEY",),
-    base_url="https://api.myprovider.com/v1",
-    auth_type="api_key",
-)
-
-register_provider(myprovider)
-```
-
-The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
-with Bearer auth automatically. No override needed for standard `/v1/models`.
-
-### 2. With quirks (subclass)
-
-```python
-# providers/myprovider.py
-from typing import Any
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class MyProviderProfile(ProviderProfile):
-    """My provider — custom reasoning header."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        **ctx: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-        if reasoning_config:
-            extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
-        return extra_body, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        # Override only if your endpoint differs from standard /v1/models
-        return super().fetch_models(api_key=api_key, timeout=timeout)
-
-
-myprovider = MyProviderProfile(
-    name="myprovider",
-    aliases=("myp",),
-    env_vars=("MYPROVIDER_API_KEY",),
-    base_url="https://api.myprovider.com/v1",
-)
-
-register_provider(myprovider)
-```
-
-### 3. Wire it up
-
-After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
-`run_agent.py` once you've verified parity against the legacy flag path. Start
-with a simple provider (no message prep, no reasoning quirks) and work up.
-
---
-
-## fetch_models contract
-
-```python
-def fetch_models(
-    self,
-    *,
-    api_key: str | None = None,
-    timeout: float = 8.0,
-) -> list[str] | None:
-    ...
-```
-
- Returns `list[str]`: model IDs from the provider's live endpoint.
- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
-  or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
- Never raises — swallow exceptions and return `None`.
- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
-  standard OpenAI-compatible provider.
-
-**Override when:**
- Auth header is not `Bearer` (Anthropic: `x-api-key`)
- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
- Response format differs (extra wrapping, non-standard `id` field)
- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
- Filtering needed post-fetch (only tool-capable models, etc.)
-
-Use `models_url` instead of overriding when the only difference is the URL:
-
-```python
-# No subclass needed — just set models_url
-myprovider = ProviderProfile(
-    name="myprovider",
-    base_url="https://api.myprovider.com/v1",
-    models_url="https://catalog.myprovider.com/models",  # different host
-)
-```
-
---
-
-## Debugging
-
-### Check if a provider resolves
-
-```python
-from providers import get_provider_profile
-
-p = get_provider_profile("myprovider")
-print(p)           # ProviderProfile(name='myprovider', ...)
-print(p.base_url)
-print(p.api_mode)
-```
-
-### Check all registered providers
-
-```python
-from providers import _REGISTRY
-print(list(_REGISTRY.keys()))
-```
-
-### Test live model fetch
-
-```python
-import os
-from providers import get_provider_profile
-
-p = get_provider_profile("myprovider")
-key = os.getenv("MYPROVIDER_API_KEY")
-models = p.fetch_models(api_key=key, timeout=5.0)
-print(models)      # list of model IDs, or None on failure
-```
-
-### Test alias resolution
-
-```python
-from providers import get_provider_profile
-
-# All of these should return the same profile
-assert get_provider_profile("openrouter").name == "openrouter"
-assert get_provider_profile("or").name == "openrouter"
-```
-
-### Run the provider test suite
-
-```bash
-# From the repo root
-source venv/bin/activate
-python -m pytest tests/providers/ -v
-```
-
-### Check ruff + ty compliance
-
-```bash
-source venv/bin/activate
-ruff format providers/*.py
-ruff check providers/*.py --select UP,E,F,I,W
-ty check providers/*.py
-```
-
---
-
-## Common mistakes
-
-**Wrong `name`** — must be the same string that appears as the key in
-`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
-into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
-with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
-
-**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
-tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
-everything else is treated as an API key. Getting this wrong causes the doctor
-health check to send a URL string as a Bearer token.
-
-**Wrong `base_url`** — several providers have non-obvious paths:
-`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
-is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
-for new providers, so it must be correct for auth resolution to work.
-
-**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
-`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
-must set it explicitly.
-
-**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
-over the package and imports each module, but only if `register_provider()` is
-called at module level. Without it the profile is never in `_REGISTRY`.
-
-**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
-model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
-
-**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
-`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
-`ValueError: not enough values to unpack` in the transport.
-
-**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
-top_level_dict)`. Returning a flat dict or swapping the order silently sends
-fields to the wrong place.
@@ -1,76 +0,0 @@
-"""Provider module registry.
-
-Auto-discovers ProviderProfile instances from providers/*.py modules.
-Each module should define a module-level PROVIDER or PROVIDERS list.
-
-Usage:
-    from providers import get_provider_profile
-    profile = get_provider_profile("nvidia")  # returns ProviderProfile or None
-    profile = get_provider_profile("kimi")    # checks name + aliases
-"""
-
-from __future__ import annotations
-
-from providers.base import OMIT_TEMPERATURE, ProviderProfile  # noqa: F401
-
-_REGISTRY: dict[str, ProviderProfile] = {}
-_ALIASES: dict[str, str] = {}
-_discovered = False
-
-
-def register_provider(profile: ProviderProfile) -> None:
-    """Register a provider profile by name and aliases."""
-    _REGISTRY[profile.name] = profile
-    for alias in profile.aliases:
-        _ALIASES[alias] = profile.name
-
-
-def get_provider_profile(name: str) -> ProviderProfile | None:
-    """Look up a provider profile by name or alias.
-
-    Returns None if the provider has no profile (falls back to generic).
-    """
-    if not _discovered:
-        _discover_providers()
-    canonical = _ALIASES.get(name, name)
-    return _REGISTRY.get(canonical)
-
-
-def list_providers() -> list[ProviderProfile]:
-    """Return all registered provider profiles (one per canonical name)."""
-    if not _discovered:
-        _discover_providers()
-    # Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
-    seen: set[int] = set()
-    result: list[ProviderProfile] = []
-    for profile in _REGISTRY.values():
-        pid = id(profile)
-        if pid not in seen:
-            seen.add(pid)
-            result.append(profile)
-    return result
-
-
-def _discover_providers() -> None:
-    """Import all provider modules to trigger registration."""
-    global _discovered
-    if _discovered:
-        return
-    _discovered = True
-
-    import importlib
-    import pkgutil
-
-    import providers as _pkg
-
-    for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
-        if modname.startswith("_") or modname == "base":
-            continue
-        try:
-            importlib.import_module(f"providers.{modname}")
-        except ImportError as e:
-            import logging
-
-            logging.getLogger(__name__).warning(
-                "Failed to import provider module %s: %s", modname, e
-            )
@@ -1,13 +0,0 @@
-"""Alibaba Cloud DashScope provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-alibaba = ProviderProfile(
-    name="alibaba",
-    aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
-    env_vars=("DASHSCOPE_API_KEY",),
-    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
-)
-
-register_provider(alibaba)
@@ -1,52 +0,0 @@
-"""Native Anthropic provider profile."""
-
-import json
-import logging
-import urllib.request
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-logger = logging.getLogger(__name__)
-
-
-class AnthropicProfile(ProviderProfile):
-    """Native Anthropic — uses x-api-key header, not Bearer."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Anthropic uses x-api-key header and anthropic-version."""
-        if not api_key:
-            return None
-        try:
-            req = urllib.request.Request("https://api.anthropic.com/v1/models")
-            req.add_header("x-api-key", api_key)
-            req.add_header("anthropic-version", "2023-06-01")
-            req.add_header("Accept", "application/json")
-            with urllib.request.urlopen(req, timeout=timeout) as resp:
-                data = json.loads(resp.read().decode())
-            return [
-                m["id"]
-                for m in data.get("data", [])
-                if isinstance(m, dict) and "id" in m
-            ]
-        except Exception as exc:
-            logger.debug("fetch_models(anthropic): %s", exc)
-            return None
-
-
-anthropic = AnthropicProfile(
-    name="anthropic",
-    aliases=("claude", "claude-oauth", "claude-code"),
-    api_mode="anthropic_messages",
-    env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
-    base_url="https://api.anthropic.com",
-    auth_type="api_key",
-    default_aux_model="claude-haiku-4-5-20251001",
-)
-
-register_provider(anthropic)
@@ -1,13 +0,0 @@
-"""Arcee AI provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-arcee = ProviderProfile(
-    name="arcee",
-    aliases=("arcee-ai", "arceeai"),
-    env_vars=("ARCEEAI_API_KEY",),
-    base_url="https://api.arcee.ai/api/v1",
-)
-
-register_provider(arcee)
@@ -1,165 +0,0 @@
-"""Provider profile base class.
-
-A ProviderProfile declares everything about an inference provider in one place:
-auth, endpoints, client quirks, request-time quirks. The transport reads this
-instead of receiving 20+ boolean flags.
-
-Provider profiles are DECLARATIVE — they describe the provider's behavior.
-They do NOT own client construction, credential rotation, or streaming.
-Those stay on AIAgent.
-"""
-
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass, field
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-# Sentinel for "omit temperature entirely" (Kimi: server manages it)
-OMIT_TEMPERATURE = object()
-
-
-@dataclass
-class ProviderProfile:
-    """Base provider profile — subclass or instantiate with overrides."""
-
-    # ── Identity ─────────────────────────────────────────────
-    name: str
-    api_mode: str = "chat_completions"
-    aliases: tuple = ()
-
-    # ── Human-readable metadata ───────────────────────────────
-    display_name: str = ""       # e.g. "GMI Cloud" — shown in picker/labels
-    description: str = ""        # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
-    signup_url: str = ""         # e.g. "https://www.gmicloud.ai/" — shown during setup
-
-    # ── Auth & endpoints ─────────────────────────────────────
-    env_vars: tuple = ()
-    base_url: str = ""
-    models_url: str = ""  # explicit models endpoint; falls back to {base_url}/models
-    auth_type: str = "api_key"   # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
-
-    # ── Model catalog ─────────────────────────────────────────
-    # fallback_models: curated list shown in /model picker when live fetch fails.
-    # Only agentic models that support tool calling should appear here.
-    fallback_models: tuple = ()
-
-    # hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
-    # e.g. "api.gmi-serving.com". Derived from base_url when empty.
-    hostname: str = ""
-
-    # ── Client-level quirks (set once at client construction) ─
-    default_headers: dict[str, str] = field(default_factory=dict)
-
-    # ── Request-level quirks ─────────────────────────────────
-    # Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
-    fixed_temperature: Any = None
-    default_max_tokens: int | None = None
-    default_aux_model: str = (
-        ""  # cheap model for auxiliary tasks (compression, vision, etc.)
-    )
-    # empty = use main model
-
-    # ── Hooks (override in subclass for complex providers) ───
-
-    def get_hostname(self) -> str:
-        """Return the provider's base hostname for URL-based detection.
-
-        Uses self.hostname if set explicitly, otherwise derives it from base_url.
-        e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com'
-        """
-        if self.hostname:
-            return self.hostname
-        if self.base_url:
-            from urllib.parse import urlparse
-            return urlparse(self.base_url).hostname or ""
-        return ""
-
-    def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """Provider-specific message preprocessing.
-
-        Called AFTER codex field sanitization, BEFORE developer role swap.
-        Default: pass-through.
-        """
-        return messages
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context: Any
-    ) -> dict[str, Any]:
-        """Provider-specific extra_body fields.
-
-        Merged into the API kwargs extra_body. Default: empty dict.
-        """
-        return {}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        **context: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Provider-specific kwargs split between extra_body and top-level api_kwargs.
-
-        Returns (extra_body_additions, top_level_kwargs).
-        The transport merges extra_body_additions into extra_body, and
-        top_level_kwargs directly into api_kwargs.
-
-        This split exists because some providers put reasoning config in
-        extra_body (OpenRouter: extra_body.reasoning) while others put it
-        as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
-
-        Default: ({}, {}).
-        """
-        return {}, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Fetch the live model list from the provider's models endpoint.
-
-        Returns a list of model ID strings, or None if the fetch failed or
-        the provider does not support live model listing.
-
-        Resolution order for the endpoint URL:
-          1. self.models_url  (explicit override — use when the models
-             endpoint differs from the inference base URL, e.g. OpenRouter
-             exposes a public catalog at /api/v1/models while inference is
-             at /api/v1)
-          2. self.base_url + "/models"  (standard OpenAI-compat fallback)
-
-        The default implementation sends Bearer auth when api_key is given
-        and forwards self.default_headers. Override to customise auth, path,
-        response shape, or to return None for providers with no REST catalog.
-
-        Callers must always fall back to the static _PROVIDER_MODELS list
-        when this returns None.
-        """
-        url = (self.models_url or "").strip()
-        if not url:
-            if not self.base_url:
-                return None
-            url = self.base_url.rstrip("/") + "/models"
-
-        import json
-        import urllib.request
-
-        req = urllib.request.Request(url)
-        if api_key:
-            req.add_header("Authorization", f"Bearer {api_key}")
-        req.add_header("Accept", "application/json")
-        for k, v in self.default_headers.items():
-            req.add_header(k, v)
-
-        try:
-            with urllib.request.urlopen(req, timeout=timeout) as resp:
-                data = json.loads(resp.read().decode())
-            items = data if isinstance(data, list) else data.get("data", [])
-            return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
-        except Exception as exc:
-            logger.debug("fetch_models(%s): %s", self.name, exc)
-            return None
@@ -1,29 +0,0 @@
-"""AWS Bedrock provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class BedrockProfile(ProviderProfile):
-    """AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Bedrock model listing requires AWS SDK, not a REST call."""
-        return None
-
-
-bedrock = BedrockProfile(
-    name="bedrock",
-    aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
-    api_mode="bedrock_converse",
-    env_vars=(),  # AWS SDK credentials — not env vars
-    base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
-    auth_type="aws_sdk",
-)
-
-register_provider(bedrock)
@@ -1,58 +0,0 @@
-"""Copilot / GitHub Models provider profile.
-
-Copilot uses per-model api_mode routing:
-  - GPT-5+ / Codex models → codex_responses
-  - Claude models → anthropic_messages
-  - Everything else → chat_completions (this profile covers that subset)
-
-Key quirks for the chat_completions subset:
-  - Editor attribution headers (via copilot_default_headers())
-  - GitHub Models reasoning extra_body (model-catalog gated)
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CopilotProfile(ProviderProfile):
-    """GitHub Copilot / GitHub Models — editor headers + reasoning."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        model: str | None = None,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **ctx,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-        if supports_reasoning and model:
-            try:
-                from hermes_cli.models import github_model_reasoning_efforts
-
-                supported_efforts = github_model_reasoning_efforts(model)
-                if supported_efforts and reasoning_config:
-                    effort = reasoning_config.get("effort", "medium")
-                    # Normalize non-standard effort levels to the nearest supported
-                    if effort == "xhigh":
-                        effort = "high"
-                    if effort in supported_efforts:
-                        extra_body["reasoning"] = {"effort": effort}
-                elif supported_efforts:
-                    extra_body["reasoning"] = {"effort": "medium"}
-            except Exception:
-                pass
-        return extra_body, {}
-
-
-copilot = CopilotProfile(
-    name="copilot",
-    aliases=("github-copilot", "github-models", "github-model", "github"),
-    env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
-    base_url="https://api.githubcopilot.com",
-    auth_type="copilot",
-)
-
-register_provider(copilot)
@@ -1,34 +0,0 @@
-"""GitHub Copilot ACP provider profile.
-
-copilot-acp uses an external ACP subprocess — NOT the standard
-transport. api_mode="copilot_acp" is handled separately in run_agent.py.
-The profile captures auth + endpoint metadata for registry migration.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CopilotACPProfile(ProviderProfile):
-    """GitHub Copilot ACP — external process, no REST models endpoint."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Model listing is handled by the ACP subprocess."""
-        return None
-
-
-copilot_acp = CopilotACPProfile(
-    name="copilot-acp",
-    aliases=("github-copilot-acp", "copilot-acp-agent"),
-    api_mode="chat_completions",  # ACP subprocess uses chat_completions routing
-    env_vars=(),  # Managed by ACP subprocess
-    base_url="acp://copilot",  # ACP internal scheme
-    auth_type="external_process",
-)
-
-register_provider(copilot_acp)
@@ -1,71 +0,0 @@
-"""Custom / Ollama (local) provider profile.
-
-Covers any endpoint registered as provider="custom", including local
-Ollama instances. Key quirks:
-  - ollama_num_ctx → extra_body.options.num_ctx (local context window)
-  - reasoning_config disabled → extra_body.think = False
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class CustomProfile(ProviderProfile):
-    """Custom/Ollama local provider — think=false and num_ctx support."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        ollama_num_ctx: int | None = None,
-        **ctx: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-
-        # Ollama context window
-        if ollama_num_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_num_ctx
-            extra_body["options"] = options
-
-        # Disable thinking when reasoning is turned off
-        if reasoning_config and isinstance(reasoning_config, dict):
-            _effort = (reasoning_config.get("effort") or "").strip().lower()
-            _enabled = reasoning_config.get("enabled", True)
-            if _effort == "none" or _enabled is False:
-                extra_body["think"] = False
-
-        return extra_body, {}
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Custom/Ollama: base_url is user-configured; fetch if set."""
-        if not self.base_url:
-            return None
-        return super().fetch_models(api_key=api_key, timeout=timeout)
-
-
-custom = CustomProfile(
-    name="custom",
-    aliases=(
-        "ollama",
-        "local",
-        "lmstudio",
-        "lm-studio",
-        "lm_studio",
-        "vllm",
-        "llamacpp",
-        "llama.cpp",
-        "llama-cpp",
-    ),
-    env_vars=(),  # No fixed key — custom endpoint
-    base_url="",  # User-configured
-)
-
-register_provider(custom)
@@ -1,20 +0,0 @@
-"""DeepSeek provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-deepseek = ProviderProfile(
-    name="deepseek",
-    aliases=("deepseek-chat",),
-    env_vars=("DEEPSEEK_API_KEY",),
-    display_name="DeepSeek",
-    description="DeepSeek — native DeepSeek API",
-    signup_url="https://platform.deepseek.com/",
-    fallback_models=(
-        "deepseek-chat",
-        "deepseek-reasoner",
-    ),
-    base_url="https://api.deepseek.com/v1",
-)
-
-register_provider(deepseek)
@@ -1,34 +0,0 @@
-"""Google Gemini provider profiles.
-
-gemini:            Google AI Studio (API key) — uses GeminiNativeClient
-google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient
-
-Both report api_mode="chat_completions" but use custom native clients
-that bypass the standard OpenAI transport. The profile captures auth
-and endpoint metadata for auth.py / runtime_provider.py migration.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-gemini = ProviderProfile(
-    name="gemini",
-    aliases=("google", "google-gemini", "google-ai-studio"),
-    api_mode="chat_completions",
-    env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
-    base_url="https://generativelanguage.googleapis.com/v1beta",
-    auth_type="api_key",
-    default_aux_model="gemini-3-flash-preview",
-)
-
-google_gemini_cli = ProviderProfile(
-    name="google-gemini-cli",
-    aliases=("gemini-cli", "gemini-oauth"),
-    api_mode="chat_completions",
-    env_vars=(),  # OAuth — no API key
-    base_url="cloudcode-pa://google",  # Cloud Code Assist internal scheme
-    auth_type="oauth_external",
-)
-
-register_provider(gemini)
-register_provider(google_gemini_cli)
@@ -1,26 +0,0 @@
-"""GMI Cloud provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-gmi = ProviderProfile(
-    name="gmi",
-    aliases=("gmi-cloud", "gmicloud"),
-    display_name="GMI Cloud",
-    description="GMI Cloud — multi-model direct API (slash-form model IDs)",
-    signup_url="https://www.gmicloud.ai/",
-    env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
-    base_url="https://api.gmi-serving.com/v1",
-    auth_type="api_key",
-    default_aux_model="google/gemini-3.1-flash-lite-preview",
-    fallback_models=(
-        "zai-org/GLM-5.1-FP8",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
-        "google/gemini-3.1-flash-lite-preview",
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-    ),
-)
-
-register_provider(gmi)
@@ -1,20 +0,0 @@
-"""Hugging Face provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-huggingface = ProviderProfile(
-    name="huggingface",
-    aliases=("hf", "hugging-face", "huggingface-hub"),
-    env_vars=("HF_TOKEN",),
-    display_name="HuggingFace",
-    description="HuggingFace Inference API",
-    signup_url="https://huggingface.co/settings/tokens",
-    fallback_models=(
-        "Qwen/Qwen3.5-72B-Instruct",
-        "deepseek-ai/DeepSeek-V3.2",
-    ),
-    base_url="https://router.huggingface.co/v1",
-)
-
-register_provider(huggingface)
@@ -1,14 +0,0 @@
-"""Kilo Code provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-kilocode = ProviderProfile(
-    name="kilocode",
-    aliases=("kilo-code", "kilo", "kilo-gateway"),
-    env_vars=("KILOCODE_API_KEY",),
-    base_url="https://api.kilo.ai/api/gateway",
-    default_aux_model="google/gemini-3-flash-preview",
-)
-
-register_provider(kilocode)
@@ -1,71 +0,0 @@
-"""Kimi / Moonshot provider profiles.
-
-Kimi has dual endpoints:
-  - sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API)
-  - legacy keys → api.moonshot.ai/v1 (OpenAI chat completions)
-
-This module covers the chat_completions path (/v1 endpoint).
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import OMIT_TEMPERATURE, ProviderProfile
-
-
-class KimiProfile(ProviderProfile):
-    """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
-
-    def build_api_kwargs_extras(
-        self, *, reasoning_config: dict | None = None, **context
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Kimi uses extra_body.thinking + top-level reasoning_effort."""
-        extra_body = {}
-        top_level = {}
-
-        if not reasoning_config or not isinstance(reasoning_config, dict):
-            # No config → thinking enabled, default effort
-            extra_body["thinking"] = {"type": "enabled"}
-            top_level["reasoning_effort"] = "medium"
-            return extra_body, top_level
-
-        enabled = reasoning_config.get("enabled", True)
-        if enabled is False:
-            extra_body["thinking"] = {"type": "disabled"}
-            return extra_body, top_level
-
-        # Enabled
-        extra_body["thinking"] = {"type": "enabled"}
-        effort = (reasoning_config.get("effort") or "").strip().lower()
-        if effort in ("low", "medium", "high"):
-            top_level["reasoning_effort"] = effort
-        else:
-            top_level["reasoning_effort"] = "medium"
-
-        return extra_body, top_level
-
-
-kimi = KimiProfile(
-    name="kimi-coding",
-    aliases=("kimi", "moonshot", "kimi-for-coding"),
-    env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
-    base_url="https://api.moonshot.ai/v1",
-    fixed_temperature=OMIT_TEMPERATURE,
-    default_max_tokens=32000,
-    default_headers={"User-Agent": "hermes-agent/1.0"},
-    default_aux_model="kimi-k2-turbo-preview",
-)
-
-kimi_cn = KimiProfile(
-    name="kimi-coding-cn",
-    aliases=("kimi-cn", "moonshot-cn"),
-    env_vars=("KIMI_CN_API_KEY",),
-    base_url="https://api.moonshot.cn/v1",
-    fixed_temperature=OMIT_TEMPERATURE,
-    default_max_tokens=32000,
-    default_headers={"User-Agent": "hermes-agent/1.0"},
-    default_aux_model="kimi-k2-turbo-preview",
-)
-
-register_provider(kimi)
-register_provider(kimi_cn)
@@ -1,31 +0,0 @@
-"""MiniMax provider profiles (international + China).
-
-Both use anthropic_messages api_mode — their inference_base_url
-ends with /anthropic which triggers auto-detection to anthropic_messages.
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-minimax = ProviderProfile(
-    name="minimax",
-    aliases=("mini-max",),
-    api_mode="anthropic_messages",
-    env_vars=("MINIMAX_API_KEY",),
-    base_url="https://api.minimax.io/anthropic",
-    auth_type="api_key",
-    default_aux_model="MiniMax-M2.7",
-)
-
-minimax_cn = ProviderProfile(
-    name="minimax-cn",
-    aliases=("minimax-china", "minimax_cn"),
-    api_mode="anthropic_messages",
-    env_vars=("MINIMAX_CN_API_KEY",),
-    base_url="https://api.minimaxi.com/anthropic",
-    auth_type="api_key",
-    default_aux_model="MiniMax-M2.7",
-)
-
-register_provider(minimax)
-register_provider(minimax_cn)
@@ -1,53 +0,0 @@
-"""Nous Portal provider profile."""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class NousProfile(ProviderProfile):
-    """Nous Portal — product tags, reasoning with Nous-specific omission."""
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context
-    ) -> dict[str, Any]:
-        return {"tags": ["product=hermes-agent"]}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **context,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Nous: passes full reasoning_config, but OMITS when disabled."""
-        extra_body = {}
-        if supports_reasoning:
-            if reasoning_config is not None:
-                rc = dict(reasoning_config)
-                if rc.get("enabled") is False:
-                    pass  # Nous omits reasoning when disabled
-                else:
-                    extra_body["reasoning"] = rc
-            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
-
-
-nous = NousProfile(
-    name="nous",
-    aliases=("nous-portal", "nousresearch"),
-    env_vars=("NOUS_API_KEY",),
-    display_name="Nous Research",
-    description="Nous Research — Hermes model family",
-    signup_url="https://nousresearch.com/",
-    fallback_models=(
-        "hermes-3-405b",
-        "hermes-3-70b",
-    ),
-    base_url="https://inference.nousresearch.com/v1",
-    auth_type="oauth_device_code",
-)
-
-register_provider(nous)
@@ -1,21 +0,0 @@
-"""NVIDIA NIM provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-nvidia = ProviderProfile(
-    name="nvidia",
-    aliases=("nvidia-nim",),
-    env_vars=("NVIDIA_API_KEY",),
-    display_name="NVIDIA NIM",
-    description="NVIDIA NIM — accelerated inference",
-    signup_url="https://build.nvidia.com/",
-    fallback_models=(
-        "nvidia/llama-3.1-nemotron-70b-instruct",
-        "nvidia/llama-3.3-70b-instruct",
-    ),
-    base_url="https://integrate.api.nvidia.com/v1",
-    default_max_tokens=16384,
-)
-
-register_provider(nvidia)
@@ -1,14 +0,0 @@
-"""Ollama Cloud provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-ollama_cloud = ProviderProfile(
-    name="ollama-cloud",
-    aliases=("ollama_cloud",),
-    default_aux_model="nemotron-3-nano:30b",
-    env_vars=("OLLAMA_API_KEY",),
-    base_url="https://ollama.com/v1",
-)
-
-register_provider(ollama_cloud)
@@ -1,15 +0,0 @@
-"""OpenAI Codex (Responses API) provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-openai_codex = ProviderProfile(
-    name="openai-codex",
-    aliases=("codex", "openai_codex"),
-    api_mode="codex_responses",
-    env_vars=(),  # OAuth external — no API key
-    base_url="https://chatgpt.com/backend-api/codex",
-    auth_type="oauth_external",
-)
-
-register_provider(openai_codex)
@@ -1,30 +0,0 @@
-"""OpenCode provider profiles (Zen + Go).
-
-Both use per-model api_mode routing:
-  - OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses,
-    everything else → chat_completions (this profile)
-  - OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions
-    (this profile)
-"""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-opencode_zen = ProviderProfile(
-    name="opencode-zen",
-    aliases=("opencode", "opencode_zen", "zen"),
-    env_vars=("OPENCODE_ZEN_API_KEY",),
-    base_url="https://opencode.ai/zen/v1",
-    default_aux_model="gemini-3-flash",
-)
-
-opencode_go = ProviderProfile(
-    name="opencode-go",
-    aliases=("opencode_go", "go", "opencode-go-sub"),
-    env_vars=("OPENCODE_GO_API_KEY",),
-    base_url="https://opencode.ai/zen/go/v1",
-    default_aux_model="glm-5",
-)
-
-register_provider(opencode_zen)
-register_provider(opencode_go)
@@ -1,86 +0,0 @@
-"""OpenRouter provider profile."""
-
-import logging
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-logger = logging.getLogger(__name__)
-
-_CACHE: list[str] | None = None
-
-
-class OpenRouterProfile(ProviderProfile):
-    """OpenRouter aggregator — provider preferences, reasoning config passthrough."""
-
-    def fetch_models(
-        self,
-        *,
-        api_key: str | None = None,
-        timeout: float = 8.0,
-    ) -> list[str] | None:
-        """Fetch from public OpenRouter catalog — no auth required.
-
-        Note: Tool-call capability filtering is applied by hermes_cli/models.py
-        via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
-        here. The picker early-returns via the dedicated openrouter path before
-        reaching this method, so filtering here would be unreachable.
-        """
-        global _CACHE  # noqa: PLW0603
-        if _CACHE is not None:
-            return _CACHE
-        try:
-            result = super().fetch_models(api_key=None, timeout=timeout)
-            if result is not None:
-                _CACHE = result
-            return result
-        except Exception as exc:
-            logger.debug("fetch_models(openrouter): %s", exc)
-            return None
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context: Any
-    ) -> dict[str, Any]:
-        body: dict[str, Any] = {}
-        prefs = context.get("provider_preferences")
-        if prefs:
-            body["provider"] = prefs
-        return body
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = False,
-        **context: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
-        extra_body: dict[str, Any] = {}
-        if supports_reasoning:
-            if reasoning_config is not None:
-                extra_body["reasoning"] = dict(reasoning_config)
-            else:
-                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
-
-
-openrouter = OpenRouterProfile(
-    name="openrouter",
-    aliases=("or",),
-    env_vars=("OPENROUTER_API_KEY",),
-    display_name="OpenRouter",
-    description="OpenRouter — unified API for 200+ models",
-    signup_url="https://openrouter.ai/keys",
-    base_url="https://openrouter.ai/api/v1",
-    models_url="https://openrouter.ai/api/v1/models",
-    fallback_models=(
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-        "deepseek/deepseek-chat",
-        "google/gemini-3-flash-preview",
-        "qwen/qwen3-plus",
-    ),
-)
-
-register_provider(openrouter)
@@ -1,82 +0,0 @@
-"""Qwen Portal provider profile."""
-
-import copy
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class QwenProfile(ProviderProfile):
-    """Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
-
-    def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-        """Normalize content to list-of-dicts format.
-
-        Inject cache_control on system message.
-
-        Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
-        """
-        prepared = copy.deepcopy(messages)
-        if not prepared:
-            return prepared
-
-        for msg in prepared:
-            if not isinstance(msg, dict):
-                continue
-            content = msg.get("content")
-            if isinstance(content, str):
-                msg["content"] = [{"type": "text", "text": content}]
-            elif isinstance(content, list):
-                normalized_parts = []
-                for part in content:
-                    if isinstance(part, str):
-                        normalized_parts.append({"type": "text", "text": part})
-                    elif isinstance(part, dict):
-                        normalized_parts.append(part)
-                if normalized_parts:
-                    msg["content"] = normalized_parts
-
-        # Inject cache_control on the last part of the system message.
-        for msg in prepared:
-            if isinstance(msg, dict) and msg.get("role") == "system":
-                content = msg.get("content")
-                if (
-                    isinstance(content, list)
-                    and content
-                    and isinstance(content[-1], dict)
-                ):
-                    content[-1]["cache_control"] = {"type": "ephemeral"}
-                break
-
-        return prepared
-
-    def build_extra_body(
-        self, *, session_id: str | None = None, **context
-    ) -> dict[str, Any]:
-        return {"vl_high_resolution_images": True}
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        qwen_session_metadata: dict | None = None,
-        **context,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        """Qwen metadata goes to top-level api_kwargs, not extra_body."""
-        top_level = {}
-        if qwen_session_metadata:
-            top_level["metadata"] = qwen_session_metadata
-        return {}, top_level
-
-
-qwen = QwenProfile(
-    name="qwen-oauth",
-    aliases=("qwen", "qwen-portal", "qwen-cli"),
-    env_vars=("QWEN_API_KEY",),
-    base_url="https://portal.qwen.ai/v1",
-    auth_type="oauth_external",
-    default_max_tokens=65536,
-)
-
-register_provider(qwen)
@@ -1,14 +0,0 @@
-"""StepFun provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-stepfun = ProviderProfile(
-    name="stepfun",
-    aliases=("step", "stepfun-coding-plan"),
-    default_aux_model="step-3.5-flash",
-    env_vars=("STEPFUN_API_KEY",),
-    base_url="https://api.stepfun.ai/step_plan/v1",
-)
-
-register_provider(stepfun)
@@ -1,43 +0,0 @@
-"""Vercel AI Gateway provider profile.
-
-AI Gateway routes to multiple backends. Hermes sends attribution
-headers and full reasoning config passthrough.
-"""
-
-from typing import Any
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-
-class VercelAIGatewayProfile(ProviderProfile):
-    """Vercel AI Gateway — attribution headers + reasoning passthrough."""
-
-    def build_api_kwargs_extras(
-        self,
-        *,
-        reasoning_config: dict | None = None,
-        supports_reasoning: bool = True,
-        **ctx: Any,
-    ) -> tuple[dict[str, Any], dict[str, Any]]:
-        extra_body: dict[str, Any] = {}
-        if supports_reasoning and reasoning_config is not None:
-            extra_body["reasoning"] = dict(reasoning_config)
-        elif supports_reasoning:
-            extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-        return extra_body, {}
-
-
-vercel = VercelAIGatewayProfile(
-    name="ai-gateway",
-    aliases=("vercel", "vercel-ai-gateway", "ai_gateway", "aigateway"),
-    env_vars=("AI_GATEWAY_API_KEY",),
-    base_url="https://ai-gateway.vercel.sh/v1",
-    default_headers={
-        "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-        "X-Title": "Hermes Agent",
-    },
-    default_aux_model="google/gemini-3-flash",
-)
-
-register_provider(vercel)
@@ -1,15 +0,0 @@
-"""xAI (Grok) provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-xai = ProviderProfile(
-    name="xai",
-    aliases=("grok", "x-ai", "x.ai"),
-    api_mode="codex_responses",
-    env_vars=("XAI_API_KEY",),
-    base_url="https://api.x.ai/v1",
-    auth_type="api_key",
-)
-
-register_provider(xai)
@@ -1,13 +0,0 @@
-"""Xiaomi MiMo provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-xiaomi = ProviderProfile(
-    name="xiaomi",
-    aliases=("mimo", "xiaomi-mimo"),
-    env_vars=("XIAOMI_API_KEY",),
-    base_url="https://api.xiaomimimo.com/v1",
-)
-
-register_provider(xiaomi)
@@ -1,21 +0,0 @@
-"""ZAI / GLM provider profile."""
-
-from providers import register_provider
-from providers.base import ProviderProfile
-
-zai = ProviderProfile(
-    name="zai",
-    aliases=("glm", "z-ai", "z.ai", "zhipu"),
-    env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
-    display_name="Z.AI (GLM)",
-    description="Z.AI / GLM — Zhipu AI models",
-    signup_url="https://z.ai/",
-    fallback_models=(
-        "glm-5",
-        "glm-4-9b",
-    ),
-    base_url="https://api.z.ai/api/paas/v4",
-    default_aux_model="glm-4.5-flash",
-)
-
-register_provider(zai)
@@ -137,7 +137,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -86,7 +86,7 @@ from tools.browser_tool import cleanup_browser


 # Agent internals extracted to agent/ package for modularity
-from agent.memory_manager import StreamingContextScrubber, build_memory_context_block, sanitize_context
+from agent.memory_manager import build_memory_context_block, sanitize_context
 from agent.retry_utils import jittered_backoff
 from agent.error_classifier import classify_api_error, FailoverReason
 from agent.prompt_builder import (
@@ -1218,10 +1218,6 @@ class AIAgent:
        # Deferred paragraph break flag — set after tool iterations so a
        # single "\n\n" is prepended to the next real text delta.
        self._stream_needs_break = False
-        # Stateful scrubber for <memory-context> spans split across stream
-        # deltas (#5719).  sanitize_context() alone can't survive chunk
-        # boundaries because the block regex needs both tags in one string.
-        self._stream_context_scrubber = StreamingContextScrubber()
        # Visible assistant text already delivered through live token callbacks
        # during the current model response. Used to avoid re-sending the same
        # commentary when the provider later returns it as a completed interim
@@ -1371,17 +1367,6 @@ class AIAgent:
                elif base_url_host_matches(effective_base, "chatgpt.com"):
                    from agent.auxiliary_client import _codex_cloudflare_headers
                    client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key)
-                elif "default_headers" not in client_kwargs:
-                    # Fall back to profile.default_headers for providers that
-                    # declare custom headers (e.g. Vercel AI Gateway attribution,
-                    # Kimi User-Agent on non-kimi.com endpoints).
-                    try:
-                        from providers import get_provider_profile as _gpf
-                        _ph = _gpf(self.provider)
-                        if _ph and _ph.default_headers:
-                            client_kwargs["default_headers"] = dict(_ph.default_headers)
-                    except Exception:
-                        pass
            else:
                # No explicit creds — use the centralized provider router
                from agent.auxiliary_client import resolve_provider_client
@@ -5048,7 +5033,7 @@ class AIAgent:
        _validate_proxy_env_urls()
        _validate_base_url(client_kwargs.get("base_url"))
        if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
-            from acp_adapter.copilot_client import CopilotACPClient
+            from agent.copilot_acp_client import CopilotACPClient

            client = CopilotACPClient(**client_kwargs)
            logger.info(
@@ -5737,19 +5722,7 @@ class AIAgent:
                self._client_kwargs.get("api_key", "")
            )
        else:
-            # No URL-specific headers — check profile.default_headers before clearing.
-            _ph_headers = None
-            try:
-                from providers import get_provider_profile as _gpf2
-                _ph2 = _gpf2(self.provider)
-                if _ph2 and _ph2.default_headers:
-                    _ph_headers = dict(_ph2.default_headers)
-            except Exception:
-                pass
-            if _ph_headers:
-                self._client_kwargs["default_headers"] = _ph_headers
-            else:
-                self._client_kwargs.pop("default_headers", None)
+            self._client_kwargs.pop("default_headers", None)

    def _swap_credential(self, entry) -> None:
        runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
@@ -6046,20 +6019,6 @@ class AIAgent:

    def _reset_stream_delivery_tracking(self) -> None:
        """Reset tracking for text delivered during the current model response."""
-        # Flush any benign partial-tag tail held by the context scrubber so it
-        # reaches the UI before we clear state for the next model call.  If
-        # the scrubber is mid-span, flush() drops the orphaned content.
-        scrubber = getattr(self, "_stream_context_scrubber", None)
-        if scrubber is not None:
-            tail = scrubber.flush()
-            if tail:
-                callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
-                for cb in callbacks:
-                    try:
-                        cb(tail)
-                    except Exception:
-                        pass
-                self._record_streamed_assistant_text(tail)
        self._current_streamed_assistant_text = ""

    def _record_streamed_assistant_text(self, text: str) -> None:
@@ -6110,28 +6069,6 @@ class AIAgent:
        if getattr(self, "_stream_needs_break", False) and text and text.strip():
            self._stream_needs_break = False
            text = "\n\n" + text
-            prepended_break = True
-        else:
-            prepended_break = False
-        if isinstance(text, str):
-            # Strip <think> blocks first (per-delta is safe for closed pairs; the
-            # unterminated-tag path is handled downstream by stream_consumer).
-            # Then feed through the stateful context scrubber so memory-context
-            # spans split across chunks cannot leak to the UI (#5719).
-            text = self._strip_think_blocks(text or "")
-            scrubber = getattr(self, "_stream_context_scrubber", None)
-            if scrubber is not None:
-                text = scrubber.feed(text)
-            else:
-                # Defensive: legacy callers without the scrubber attribute.
-                text = sanitize_context(text)
-            # Only strip leading newlines on the first delta — mid-stream "\n" is legitimate markdown.
-            if not prepended_break and not getattr(
-                self, "_current_streamed_assistant_text", ""
-            ):
-                text = text.lstrip("\n")
-        if not text:
-            return
        callbacks = [cb for cb in (self.stream_delta_callback, self._stream_callback) if cb is not None]
        delivered = False
        for cb in callbacks:
@@ -7880,79 +7817,66 @@ class AIAgent:
        # ── chat_completions (default) ─────────────────────────────────────
        _ct = self._get_transport()

-        # ── Provider profile path (all chat_completions providers) ─────────
-        # Profiles handle per-provider quirks via hooks. We compute the shared
-        # per-call context here and pass it through so hooks can use it.
+        # Provider detection flags
+        _is_qwen = self._is_qwen_portal()
+        _is_or = self._is_openrouter_url()
+        _is_gh = (
+            base_url_host_matches(self._base_url_lower, "models.github.ai")
+            or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
+        )
+        _is_nous = "nousresearch" in self._base_url_lower
+        _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower
+        _is_kimi = (
+            base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+
+        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
+        # sentinel (temperature omitted entirely), a numeric override, or None.
        try:
-            from providers import get_provider_profile
-            _profile = get_provider_profile(self.provider)
+            from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
+            _ft = _fixed_temperature_for_model(self.model, self.base_url)
+            _omit_temp = _ft is OMIT_TEMPERATURE
+            _fixed_temp = _ft if not _omit_temp else None
        except Exception:
-            _profile = None
+            _omit_temp = False
+            _fixed_temp = None

-        if _profile:
-            _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
-            if _ephemeral_out is not None:
-                self._ephemeral_max_output_tokens = None
+        # Provider preferences (OpenRouter-specific)
+        _prefs: Dict[str, Any] = {}
+        if self.providers_allowed:
+            _prefs["only"] = self.providers_allowed
+        if self.providers_ignored:
+            _prefs["ignore"] = self.providers_ignored
+        if self.providers_order:
+            _prefs["order"] = self.providers_order
+        if self.provider_sort:
+            _prefs["sort"] = self.provider_sort
+        if self.provider_require_parameters:
+            _prefs["require_parameters"] = True
+        if self.provider_data_collection:
+            _prefs["data_collection"] = self.provider_data_collection

-            # Per-call context for profile hooks — mirrors the legacy flag block.
-            # Computed here so profiles receive live per-call values (not stale).
-            _prefs: Dict[str, Any] = {}
-            if self.providers_allowed:
-                _prefs["only"] = self.providers_allowed
-            if self.providers_ignored:
-                _prefs["ignore"] = self.providers_ignored
-            if self.providers_order:
-                _prefs["order"] = self.providers_order
-            if self.provider_sort:
-                _prefs["sort"] = self.provider_sort
-            if self.provider_require_parameters:
-                _prefs["require_parameters"] = True
-            if self.provider_data_collection:
-                _prefs["data_collection"] = self.provider_data_collection
+        # Anthropic max output for Claude on OpenRouter/Nous
+        _ant_max = None
+        if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
+            try:
+                from agent.anthropic_adapter import _get_anthropic_max_output
+                _ant_max = _get_anthropic_max_output(self.model)
+            except Exception:
+                pass  # fail open — let the proxy pick its default

-            _is_or = self._is_openrouter_url()
-            _is_nous = "nousresearch" in self._base_url_lower
-            _ant_max = None
-            if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
-                try:
-                    from agent.anthropic_adapter import _get_anthropic_max_output
-                    _ant_max = _get_anthropic_max_output(self.model)
-                except Exception:
-                    pass
+        # Qwen session metadata precomputed here (promptId is per-call random)
+        _qwen_meta = None
+        if _is_qwen:
+            _qwen_meta = {
+                "sessionId": self.session_id or "hermes",
+                "promptId": str(uuid.uuid4()),
+            }

-            _is_qwen = self._is_qwen_portal()
-            _qwen_meta = None
-            if _is_qwen:
-                _qwen_meta = {
-                    "sessionId": self.session_id or "hermes",
-                    "promptId": str(uuid.uuid4()),
-                }
-
-            return _ct.build_kwargs(
-                model=self.model,
-                messages=api_messages,
-                tools=self.tools,
-                timeout=self._resolved_api_call_timeout(),
-                max_tokens=self.max_tokens,
-                ephemeral_max_output_tokens=_ephemeral_out,
-                max_tokens_param_fn=self._max_tokens_param,
-                reasoning_config=self.reasoning_config,
-                request_overrides=self.request_overrides,
-                session_id=getattr(self, "session_id", None),
-                provider_profile=_profile,
-                ollama_num_ctx=self._ollama_num_ctx,
-                # Context forwarded to profile hooks:
-                provider_preferences=_prefs or None,
-                anthropic_max_output=_ant_max,
-                supports_reasoning=self._supports_reasoning_extra_body(),
-                qwen_session_metadata=_qwen_meta,
-            )
-
-        # ── Legacy flag path ────────────────────────────────────────────
-        # Reached only when get_provider_profile() returns None — i.e. a
-        # completely unknown provider not in providers/ registry.
-        # Best-effort: send a clean chat_completions request with no
-        # provider-specific quirks.
+        # Ephemeral max output override — consume immediately so the next
+        # turn doesn't inherit it.
        _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
        if _ephemeral_out is not None:
            self._ephemeral_max_output_tokens = None
@@ -7971,7 +7895,24 @@ class AIAgent:
            reasoning_config=self.reasoning_config,
            request_overrides=self.request_overrides,
            session_id=getattr(self, "session_id", None),
+            model_lower=(self.model or "").lower(),
+            is_openrouter=_is_or,
+            is_nous=_is_nous,
+            is_qwen_portal=_is_qwen,
+            is_github_models=_is_gh,
+            is_nvidia_nim=_is_nvidia,
+            is_kimi=_is_kimi,
+            is_custom_provider=self.provider == "custom",
            ollama_num_ctx=self._ollama_num_ctx,
+            provider_preferences=_prefs or None,
+            qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None,
+            qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None,
+            qwen_session_metadata=_qwen_meta,
+            fixed_temperature=_fixed_temp,
+            omit_temperature=_omit_temp,
+            supports_reasoning=self._supports_reasoning_extra_body(),
+            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
+            anthropic_max_output=_ant_max,
        )

    def _supports_reasoning_extra_body(self) -> bool:
@@ -8479,23 +8420,6 @@ class AIAgent:
                    f"⚠ Compression summary failed: {summary_error}. "
                    "Inserted a fallback context marker."
                )
-        else:
-            # No hard failure — but did the configured aux model error out
-            # and get recovered by retrying on main?  Surface that so users
-            # know their auxiliary.compression.model setting is broken even
-            # though compression succeeded.
-            _aux_fail_model = getattr(self.context_compressor, "_last_aux_model_failure_model", None)
-            _aux_fail_err = getattr(self.context_compressor, "_last_aux_model_failure_error", None)
-            if _aux_fail_model:
-                # Dedup on (model, error) so we don't spam on every compaction
-                _aux_key = (_aux_fail_model, _aux_fail_err)
-                if getattr(self, "_last_aux_fallback_warning_key", None) != _aux_key:
-                    self._last_aux_fallback_warning_key = _aux_key
-                    self._emit_warning(
-                        f"ℹ Configured compression model '{_aux_fail_model}' failed "
-                        f"({_aux_fail_err or 'unknown error'}). Recovered using main model — "
-                        "check auxiliary.compression.model in config.yaml."
-                    )

        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
@@ -9668,6 +9592,16 @@ class AIAgent:
        if isinstance(persist_user_message, str):
            persist_user_message = _sanitize_surrogates(persist_user_message)

+        # Strip leaked <memory-context> blocks from user input.  When Honcho's
+        # saveMessages persists a turn that included injected context, the block
+        # can reappear in the next turn's user message via message history.
+        # Stripping here prevents stale memory tags from leaking into the
+        # conversation and being visible to the user or the model as user text.
+        if isinstance(user_message, str):
+            user_message = sanitize_context(user_message)
+        if isinstance(persist_user_message, str):
+            persist_user_message = sanitize_context(persist_user_message)
+
        # Store stream callback for _interruptible_api_call to pick up
        self._stream_callback = stream_callback
        self._persist_user_message_idx = None
@@ -9746,13 +9680,6 @@ class AIAgent:
        # Track user turns for memory flush and periodic nudge logic
        self._user_turn_count += 1

-        # Reset the streaming context scrubber at the top of each turn so a
-        # hung span from a prior interrupted stream can't taint this turn's
-        # output.
-        scrubber = getattr(self, "_stream_context_scrubber", None)
-        if scrubber is not None:
-            scrubber.reset()
-
        # Preserve the original user message (no nudge injection).
        original_user_message = persist_user_message if persist_user_message is not None else user_message

@@ -12784,6 +12711,7 @@ class AIAgent:
                        truncated_response_prefix = ""
                        length_continue_retries = 0
                    
+                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
                    final_response = self._strip_think_blocks(final_response).strip()
                    
                    final_msg = self._build_assistant_message(assistant_message, finish_reason)
@@ -557,12 +557,6 @@ AUTHOR_MAP = {
    "mor.aleksandr@yahoo.com": "MorAlekss",
    "ash@users.noreply.github.com": "ash",
    "andrewho.sf@gmail.com": "andrewhosf",
-    # April 2026 Honcho bug-fix consolidation (#15381)
-    "HiddenPuppy@users.noreply.github.com": "HiddenPuppy",
-    "code@sasha.id": "sasha-id",
-    "dontcallmejames@users.noreply.github.com": "dontcallmejames",
-    "hekaru.agent@gmail.com": "hekaru-agent",
-    "jas9000@gmail.com": "twozle",
 }


@@ -1,275 +0,0 @@
-# 3D Scene Reference
-
-Lighting rigs, shadows, IBL/cubemaps, multi-camera, and PBR materials. For wireframe rendering and feedback TOPs see `operator-tips.md`. For instancing geometry see `geometry-comp.md`. For shader code see `glsl.md`.
-
---
-
-## Anatomy of a 3D Scene
-
-```
-[Geometry COMP]    ← contains SOPs (the shapes)
-[Material]         ← Phong/PBR/GLSL/Constant MAT
-[Light COMPs]      ← point/directional/spot/area/environment
-[Camera COMP]      ← view position, FOV
-        │
-        ▼
-   [Render TOP]    ← combines geo + lights + camera into a 2D image
-        │
-        ▼
-   [post-FX chain] ← bloomTOP, glsl shaders, etc.
-        │
-        ▼
-   [windowCOMP]    ← actual display
-```
-
-Render TOP is the heart. It takes an explicit `geometry` path, an explicit `camera` path, and lights via the lights table or an envlight reference.
-
---
-
-## Minimal Scene
-
-```python
-# Geometry
-geo = root.create(geometryCOMP, 'scene_geo')
-sphere = geo.create(sphereSOP, 'shape')
-sphere.par.rad = 1.0; sphere.par.rows = 64; sphere.par.cols = 64
-
-# Material — start with PBR
-mat = root.create(pbrMAT, 'mat')
-mat.par.basecolorr = 0.7; mat.par.basecolorg = 0.7; mat.par.basecolorb = 0.7
-mat.par.metallic = 0.0
-mat.par.roughness = 0.4
-
-geo.par.material = mat.path
-
-# Camera
-cam = root.create(cameraCOMP, 'cam1')
-cam.par.tx = 0; cam.par.ty = 0; cam.par.tz = 4
-cam.par.fov = 45
-cam.par.near = 0.1; cam.par.far = 100
-
-# Key light
-key = root.create(lightCOMP, 'key_light')
-key.par.lighttype = 'point'
-key.par.tx = 3; key.par.ty = 3; key.par.tz = 3
-key.par.dimmer = 1.5
-
-# Render
-render = root.create(renderTOP, 'render1')
-render.par.outputresolution = 'custom'
-render.par.resolutionw = 1920; render.par.resolutionh = 1080
-render.par.camera = cam.path
-render.par.geometry = geo.path
-render.par.lights = key.path                 # single light path; for multi, see below
-render.par.bgcolorr = 0; render.par.bgcolorg = 0; render.par.bgcolorb = 0
-```
-
-For multiple lights, leave `par.lights` blank — Render TOP scans the network for all `lightCOMP` and `envlightCOMP` ops by default. To restrict to specific lights, set `par.lights = '/project1/key_light /project1/fill_light'` (space-separated paths).
-
---
-
-## Light Types
-
-| Type | What | Common params |
-|---|---|---|
-| `point` | Omnidirectional, falls off with distance | `dimmer`, `coneangle` (n/a), `attenuation` |
-| `directional` | Parallel rays, infinite distance (sun) | `dimmer`, light's rotation only matters |
-| `spot` | Cone, falls off with distance + angle | `coneangle`, `conedelta`, `dimmer` |
-| `cone` | Like spot but harder edge | same |
-| `area` | Rectangular soft light source | `sizex`, `sizey` |
-
-For all: `colorr`, `colorg`, `colorb`, `tx/ty/tz`, `rx/ry/rz`, `dimmer`.
-
-### Three-Point Lighting (Studio Setup)
-
-```python
-# Key — main light, ~45° front
-key = root.create(lightCOMP, 'key')
-key.par.lighttype = 'point'
-key.par.tx = 4; key.par.ty = 3; key.par.tz = 4
-key.par.dimmer = 1.5
-key.par.colorr = 1.0; key.par.colorg = 0.95; key.par.colorb = 0.85
-
-# Fill — softer, opposite side
-fill = root.create(lightCOMP, 'fill')
-fill.par.lighttype = 'area'
-fill.par.tx = -4; fill.par.ty = 2; fill.par.tz = 3
-fill.par.dimmer = 0.5
-fill.par.colorr = 0.7; fill.par.colorg = 0.8; fill.par.colorb = 1.0
-fill.par.sizex = 4; fill.par.sizey = 4
-
-# Rim/back — outline from behind
-rim = root.create(lightCOMP, 'rim')
-rim.par.lighttype = 'spot'
-rim.par.tx = 0; rim.par.ty = 4; rim.par.tz = -4
-rim.par.coneangle = 30
-rim.par.dimmer = 1.0
-
-# Optional: ambient lift to prevent pure-black shadows
-amb = root.create(ambientlightCOMP, 'ambient')
-amb.par.dimmer = 0.15
-```
-
---
-
-## Shadows
-
-Spot and directional lights cast shadows when `par.shadowtype != 'none'`.
-
-```python
-key.par.shadowtype = 'softshadow'        # 'none' | 'hardshadow' | 'softshadow'
-key.par.shadowsize = 1024                # shadow map resolution
-key.par.shadowsoftness = 0.02            # softshadow only
-```
-
-**Tips:**
- Soft shadows are GPU-expensive. Start with `shadowsize = 1024` and only go higher (2048/4096) if shadow edges look pixelated at your resolution.
- Set the spot light's `near`/`far` to JUST contain the scene. Wider range = wasted shadow map precision.
- Multiple shadow-casting lights compound cost. Limit to 1-2 in real-time work; pre-bake the rest into the materials.
-
---
-
-## Image-Based Lighting (IBL) / Environment Light
-
-For realistic PBR materials you need a cubemap for reflections.
-
-```python
-# Environment light from an HDR
-env = root.create(envlightCOMP, 'env')
-env.par.envmap = '/project1/cube_in'         # path to a TOP that produces a cubemap
-env.par.envlightmap = ...                    # diffuse irradiance map (often same as envmap)
-env.par.dimmer = 1.0
-
-# Cubemap source — option A: built-in cubeTOP from 6 faces
-cube = root.create(cubeTOP, 'cube_in')
-# (assign 6 face TOPs)
-
-# Option B: HDR equirectangular → cubemap conversion
-# Use a moviefileinTOP loading .hdr or .exr, then projectTOP type='cubemapfromequirect'
-hdr = root.create(moviefileinTOP, 'hdr_src')
-hdr.par.file = '/path/to/environment.hdr'
-
-proj = root.create(projectTOP, 'cube_proj')
-proj.par.projecttype = 'cubemapfromequirect'
-proj.inputConnectors[0].connect(hdr)
-```
-
-PBR materials sample the environment automatically when `envlightCOMP` is in the scene. Verify param names with `td_get_par_info(op_type='envlightCOMP')` — TD versions vary.
-
---
-
-## PBR Material Setup
-
-```python
-mat = root.create(pbrMAT, 'pbr_metal')
-mat.par.basecolorr = 0.95; mat.par.basecolorg = 0.65; mat.par.basecolorb = 0.4
-mat.par.metallic = 1.0
-mat.par.roughness = 0.25
-mat.par.specularlevel = 0.5
-mat.par.emitcolorr = 0; mat.par.emitcolorg = 0; mat.par.emitcolorb = 0
-
-# Texture maps
-mat.par.basecolormap = '/project1/textures/albedo'         # TOP path
-mat.par.metallicroughnessmap = '/project1/textures/mr'      # G=roughness, B=metallic (glTF convention)
-mat.par.normalmap = '/project1/textures/normal'
-mat.par.emitmap = '/project1/textures/emit'
-mat.par.occlusionmap = '/project1/textures/ao'
-```
-
-**Material idioms:**
-
-| Look | metallic | roughness | basecolor |
-|---|---|---|---|
-| Brushed steel | 1.0 | 0.4 | (0.7, 0.7, 0.7) |
-| Polished gold | 1.0 | 0.1 | (1.0, 0.85, 0.4) |
-| Plastic | 0.0 | 0.5 | mid-saturated |
-| Rubber | 0.0 | 0.9 | dark |
-| Glass | 0.0 | 0.05 | (1, 1, 1), low alpha + transmission |
-| Glowing emitter | 0.0 | 1.0 | dark, high `emitcolor` |
-
-For glass/transmission, recent TD versions support `transmission` in PBR; older versions need glslMAT.
-
---
-
-## Multi-Camera Setups
-
-For comparison views, instant replay, multi-screen mapping, etc.
-
-```python
-# Camera A — main scene
-cam_a = root.create(cameraCOMP, 'cam_main')
-cam_a.par.tz = 5
-
-# Camera B — orbiting top-down
-cam_b = root.create(cameraCOMP, 'cam_top')
-cam_b.par.ty = 6; cam_b.par.rx = -90
-
-# Render each via separate Render TOPs
-render_a = root.create(renderTOP, 'render_main')
-render_a.par.camera = cam_a.path
-render_a.par.geometry = geo.path
-
-render_b = root.create(renderTOP, 'render_top')
-render_b.par.camera = cam_b.path
-render_b.par.geometry = geo.path
-```
-
-Composite both with a `multiplyTOP`/`compositeTOP` for picture-in-picture, or route to separate `windowCOMP`s for multi-display.
-
-### Camera animation
-
-Drive camera params via expressions (orbit), animationCOMP (waypoint), or LFO (oscillation):
-
-```python
-# Orbiting camera
-cam_a.par.tx.mode = ParMode.EXPRESSION
-cam_a.par.tx.expr = "cos(absTime.seconds * 0.3) * 6"
-cam_a.par.tz.mode = ParMode.EXPRESSION
-cam_a.par.tz.expr = "sin(absTime.seconds * 0.3) * 6"
-cam_a.par.lookat = '/project1/scene_geo'        # auto-aim at target
-```
-
-`par.lookat` is the simplest "always look at target" mechanism.
-
-### Depth of field
-
-PBR + Render TOP supports DOF when `par.dof = 'on'`.
-
-```python
-render.par.dof = 'on'
-render.par.focusdistance = 5.0
-render.par.aperture = 0.05         # blur strength
-render.par.bokehshape = 'hexagon'
-```
-
-DOF is GPU-heavy. Render at lower res then upscale for performance.
-
---
-
-## Common Pitfalls
-
-1. **Render TOP shows black** — most common cause: no light. Even with PBR you need at least one `lightCOMP` or `envlightCOMP`. Add an `ambientlightCOMP` at low dimmer as a safety net.
-2. **Material doesn't appear** — `geo.par.material` must be a string PATH, not the material op itself. Use `mat.path`, not `mat`.
-3. **Lights ignored** — by default Render TOP picks up ALL `lightCOMP`s in the network. If you have leftover lights from another scene, they leak in. Set `par.lights` explicitly.
-4. **PBR looks flat** — without an `envlightCOMP` providing reflections, PBR materials look like Phong. Add one even if you don't have an HDR (use a `constantTOP` cubemap as fallback).
-5. **Shadow acne / striping** — increase `par.shadowbias` slightly. Tune per-light.
-6. **Camera inside geometry** — if `cam.par.tz` is INSIDE a sphere, you see the inside (or nothing if backface culled). Move the camera further out.
-7. **Light range too small** — point lights have implicit attenuation. Far-away geometry receives little light. Increase `par.dimmer` or move lights closer.
-8. **Multiple cameras conflict** — one render TOP = one camera. Don't try to share. Use multiple render TOPs.
-9. **Wrong handedness** — TD is right-handed Y-up. Imported assets from Z-up apps (Blender, Maya in Z-up) need a 90° X rotation on the geo COMP.
-10. **Cooking budget** — PBR + IBL + shadows + DOF at 1080p60 is fine on modern GPUs but 4K + 4 lights + soft shadows + DOF will tank. Profile via `td_get_perf` and downgrade settings before adding more.
-
---
-
-## Quick Recipes
-
-| Goal | Recipe |
-|---|---|
-| Studio portrait | 3-point rig (key + fill + rim) + ambient + PBR mat + DOF |
-| Outdoor daylight | One directional `lightCOMP` (sun) + envlight (sky HDR) + soft shadows |
-| Dramatic / film noir | Single spot light from upper side, hard shadows, deep ambient = 0.05 |
-| Abstract / dreamy | Multiple area lights at low dimmer, no shadows, `bloomTOP` post |
-| Product render | Three-point + IBL + neutral PBR + `bgcolorr=g=b=1` (white seamless) |
-| Game-style | Phong MAT + 1-2 lights + no IBL + flat ambient (cheap, stylized) |
-| Wireframe + solid | Two render TOPs (one with wireframeMAT, one with PBR), composite via `addTOP` |
-| Orbiting camera | `par.lookat` + expressions on tx/tz using sin/cos |
@@ -1,221 +0,0 @@
-# Animation Reference
-
-Patterns for time-based motion — keyframes, LFOs, timers, easing, expression-driven animation.
-
-Always call `td_get_par_info` for the op type before setting params. Param names below reflect TD 2025.32 but verify if errors fire.
-
---
-
-## Time Sources
-
-TD has three time references — pick the right one.
-
-| Expression | Behavior | Use for |
-|---|---|---|
-| `absTime.seconds` | Wall-clock seconds since TD started. Never resets. | Continuous motion, GLSL `uTime`, infinite loops |
-| `absTime.frame` | Wall-clock frame count. | Frame-accurate triggers |
-| `me.time.frame` | Local component frame count (resets on play/stop). | Per-COMP animation timeline |
-| `me.time.seconds` | Local component seconds. | Same, in seconds |
-
-**Rule:** for shaders and continuous motion use `absTime.seconds`. For triggered/looping animations inside a COMP use `me.time.*`.
-
---
-
-## LFO CHOP — Cyclic Motion
-
-The simplest periodic driver. Fast, GPU-cheap, expression-friendly.
-
-```python
-lfo = root.create(lfoCHOP, 'rot_driver')
-lfo.par.type = 'sin'        # 'sin' | 'cos' | 'ramp' | 'square' | 'triangle' | 'pulse'
-lfo.par.frequency = 0.25    # cycles per second
-lfo.par.amplitude = 1.0
-lfo.par.offset = 0.0
-lfo.par.phase = 0.0         # 0-1, useful for offsetting parallel LFOs
-```
-
-**Drive a parameter via export:**
-
-```python
-op('/project1/geo1').par.rx.mode = ParMode.EXPRESSION
-op('/project1/geo1').par.rx.expr = "op('rot_driver')['chan1'] * 360"
-```
-
-**Multiple synced LFOs (X/Y/Z rotation with phase offsets):**
-Create one LFO with three channels and phase-offset each, or use three LFOs and offset their `phase` params (0.0, 0.33, 0.66).
-
---
-
-## Timer CHOP — Triggered Sequences
-
-For run-once animations, beat-locked sequences, or stage-based logic.
-
-```python
-timer = root.create(timerCHOP, 'fade_timer')
-timer.par.length = 4.0       # cycle length in seconds
-timer.par.cycle = False      # run once vs. loop
-timer.par.outputseconds = True
-```
-
-Output channels: `timer_fraction` (0→1 across the cycle), `running`, `done`, `cycles`.
-
-**Start the timer:**
-```python
-timer.par.start.pulse()
-```
-
-**Drive a fade:**
-```python
-op('/project1/level1').par.opacity.mode = ParMode.EXPRESSION
-op('/project1/level1').par.opacity.expr = "op('fade_timer')['timer_fraction']"
-```
-
-**Easing on the timer fraction** — apply in the expression itself:
-
-```python
-# Smoothstep: ease in/out
-expr = "smoothstep(0, 1, op('fade_timer')['timer_fraction'])"
-# Cubic ease-out: 1 - (1-t)^3
-expr = "1 - pow(1 - op('fade_timer')['timer_fraction'], 3)"
-```
-
---
-
-## Pattern CHOP — Custom Curves
-
-For arbitrary waveforms (saw ramps, easing curves, custom envelopes).
-
-```python
-pat = root.create(patternCHOP, 'envelope')
-pat.par.type = 'gaussian'    # 'gaussian' | 'ramp' | 'square' | 'sin' | etc.
-pat.par.length = 60          # samples
-pat.par.cyclelength = 1.0    # seconds at TD framerate
-```
-
-Combine with `lookupCHOP` to remap a 0-1 driver through a custom curve.
-
---
-
-## Animation COMP — Keyframe-Based
-
-For multi-keyframe motion graphics. Each animationCOMP holds channels with keyframes editable in the Animation Editor.
-
-```python
-anim = root.create(animationCOMP, 'intro_anim')
-# By default has channels chan1..chanN; access via:
-# op('intro_anim').par.length, .par.play, .par.cue, etc.
-
-# Drive a parameter from a channel
-op('/project1/text1').par.tx.mode = ParMode.EXPRESSION
-op('/project1/text1').par.tx.expr = "op('intro_anim/out1')['chan1']"
-```
-
-**Keyframes are typically edited in the UI** (Animation Editor), but can be set via `keyframes` table internally. For programmatic keyframe creation, use `td_execute_python`:
-
-```python
-# Get the channel CHOP inside an animationCOMP
-ch = op('/project1/intro_anim/chans')
-# Insert a key (advanced API — verify with td_get_par_info(op_type='animationCOMP'))
-ch.appendKey('chan1', frame=0, value=0.0, expression=None)
-ch.appendKey('chan1', frame=120, value=1.0)
-```
-
-For most use cases, drive params with LFO/Timer/Pattern CHOPs instead — simpler and scriptable.
-
---
-
-## Easing in Expressions
-
-TD's expression evaluator supports Python math. Common easing forms:
-
-```python
-# Linear
-"t"
-
-# Smoothstep (classic ease-in-out)
-"smoothstep(0, 1, t)"
-
-# Ease-out cubic
-"1 - pow(1 - t, 3)"
-
-# Ease-in cubic
-"pow(t, 3)"
-
-# Ease-in-out cubic
-"3*t*t - 2*t*t*t"
-
-# Bounce (manual, simplified)
-"abs(sin(t * 6.28 * 3) * (1 - t))"
-```
-
-Where `t` is `op('fade_timer')['timer_fraction']` or any 0-1 driver.
-
---
-
-## Filter CHOP — Smoothing Existing Channels
-
-Smooth out jittery values (e.g., audio analysis, sensor data) before driving visuals.
-
-```python
-filt = root.create(filterCHOP, 'smooth')
-filt.par.filter = 'gaussian'   # or 'lowpass'
-filt.par.width = 0.5            # smoothing window in seconds
-filt.inputConnectors[0].connect(op('raw_signal'))
-```
-
-**WARNING:** Do NOT use Filter CHOP on AudioSpectrum output in timeslice mode — it expands the sample count and averages bins to near-zero. See `audio-reactive.md`.
-
---
-
-## Lag CHOP — Asymmetric Attack/Release
-
-Different speeds for rising vs. falling values. Standard for visualizing audio envelopes.
-
-```python
-lag = root.create(lagCHOP, 'env_smooth')
-lag.par.lag1 = 0.02   # attack (rise time, seconds)
-lag.par.lag2 = 0.30   # release (fall time, seconds)
-lag.inputConnectors[0].connect(op('raw_envelope'))
-```
-
-Fast attack, slow release = classic VU-meter feel.
-
---
-
-## Per-Frame Driving via Script DAT
-
-For complex per-frame logic that doesn't fit expressions, use a `executeDAT` (`onFrameStart` callback) or a `chopExecuteDAT`.
-
-```python
-# In an executeDAT (frameStart):
-def onFrameStart(frame):
-    t = absTime.seconds
-    op('/project1/circle').par.tx = math.sin(t * 2.0) * 3.0
-    op('/project1/circle').par.ty = math.cos(t * 2.0) * 3.0
-    return
-```
-
-Heavy logic should still be in CHOPs (CPU-cheap, deterministic). Reserve scripts for one-shots or non-realtime branching.
-
---
-
-## Pitfalls
-
-1. **Frame rate dependency** — `me.time.frame` is in TD project frames (default 60). If your project rate changes, motion speed changes. Use `seconds` for rate-independent timing.
-2. **Cooking budget** — every CHOP that drives a parameter cooks every frame. Consolidate drivers (one big mathCHOP > many small ones).
-3. **Expression mode** — params default to `CONSTANT`. `par.X.expr = ...` is ignored unless `par.X.mode = ParMode.EXPRESSION`.
-4. **Animation editor edits** — keyframes set via UI live in the animationCOMP's internal keyframe table. They survive save/reopen. Programmatic keys via `appendKey()` work but verify the API with `td_get_docs(topic='animation')` first.
-5. **Looping animations** — for seamless loops, `length` must equal `cyclelength` and the start/end values must match. Otherwise expect a visible jump.
-
---
-
-## Quick Recipes
-
-| Goal | Simplest path |
-|---|---|
-| Continuous rotation | LFO CHOP `type='ramp'`, expr → `geo.par.rx` |
-| Fade in over 2s | Timer CHOP `length=2`, smoothstep expr → `level.par.opacity` |
-| Pulse on every beat | `triggerCHOP` from audio → drive scale via expression |
-| 3D Lissajous orbit | Two LFOs with different freq, drive `tx`/`ty`/`tz` |
-| Random jitter | `noiseCHOP` (low-freq) added to position |
-| Timed scene switch | Timer CHOP → switchTOP/CHOP `index` |
@@ -1,352 +0,0 @@
-# DAT-Based Scripting Reference
-
-TD's event/callback model — Python that runs in response to network events. The full set of "Execute DATs" plus their idiomatic patterns.
-
-For arbitrary Python execution (not callback-based), see `python-api.md`. For the MCP's `td_execute_python` tool, see `mcp-tools.md`.
-
---
-
-## The Execute DAT Family
-
-Every type watches one kind of event source and fires Python on changes.
-
-| DAT | Watches | Use for |
-|---|---|---|
-| `chopExecuteDAT` | A CHOP's channel values | Audio triggers, threshold callbacks, state machines on numeric input |
-| `datExecuteDAT` | A DAT's content (table cells, text) | Reacting to data updates from APIs, parsing webDAT responses |
-| `parameterExecuteDAT` | A parameter's value or pulse | Reacting to user-changed params, custom pulse buttons |
-| `panelExecuteDAT` | A panel COMP's interaction | Button clicks, slider drags, field commits |
-| `opExecuteDAT` | Operator lifecycle | New operator created, deleted, name changed |
-| `executeDAT` | Project lifecycle, frame events | Run-once setup, per-frame logic, save/load hooks |
-
-All have a docked DAT with predefined callback functions. You only fill in the bodies of the ones you care about.
-
---
-
-## chopExecuteDAT — Numeric Triggers
-
-```python
-ce = root.create(chopExecuteDAT, 'kick_handler')
-ce.par.chop = '/project1/audio/out_kick'      # source CHOP
-ce.par.offtoon = True                          # fire when channel rises above 0
-ce.par.ontooff = False
-ce.par.whileon = False
-ce.par.valuechange = False
-```
-
-In the docked callback DAT:
-
-```python
-def offToOn(channel, sampleIndex, val, prev):
-    """Channel went from 0 to non-zero. Classic beat trigger."""
-    op('/project1/strobe').par.flash.pulse()
-    op('/project1/scene').par.index = (op('/project1/scene').par.index + 1) % 8
-    return
-
-def onToOff(channel, sampleIndex, val, prev):
-    """Channel went from non-zero to 0."""
-    return
-
-def whileOn(channel, sampleIndex, val, prev):
-    """Fires every frame while channel is non-zero. Use sparingly."""
-    return
-
-def valueChange(channel, sampleIndex, val, prev):
-    """Fires every frame the value changes (continuous). Heavy."""
-    return
-```
-
-`channel` is a `Channel` object — `.name`, `.owner`, `.vals[]`. Use `channel.name == 'chan1'` to filter.
-
-**Threshold-based custom triggers:** wire the source CHOP through a `triggerCHOP` first to get clean 0/1 pulses, then watch with `offtoon`.
-
---
-
-## datExecuteDAT — Table/Text Changes
-
-```python
-de = root.create(datExecuteDAT, 'api_response')
-de.par.dat = '/project1/api/web1'              # source DAT
-de.par.tablechange = True                      # any cell change
-de.par.cellchange = False
-de.par.rowchange = False
-de.par.colchange = False
-```
-
-```python
-def onTableChange(dat):
-    """Whole table changed (including text DAT content updates)."""
-    if dat.numRows == 0:
-        return
-    # If it's a webDAT response, parse JSON
-    import json
-    try:
-        data = json.loads(dat.text)
-    except json.JSONDecodeError:
-        debug(f'Bad JSON: {dat.text[:100]}')
-        return
-    # Write to a CHOP
-    op('/project1/api_value').par.value0 = float(data.get('count', 0))
-    return
-
-def onCellChange(dat, cells, prev):
-    """Specific cells changed."""
-    for cell in cells:
-        # cell.row, cell.col, cell.val
-        pass
-    return
-```
-
-`debug()` prints to the textport — readable via `td_read_textport`.
-
---
-
-## parameterExecuteDAT — Param Changes & Pulse
-
-```python
-pe = root.create(parameterExecuteDAT, 'comp_params')
-pe.par.op = '/project1/my_component'           # COMP whose params to watch
-pe.par.parameters = '*'                         # or specific names like 'Intensity Reset'
-pe.par.valuechange = True
-pe.par.pulse = True
-```
-
-```python
-def onValueChange(par, prev):
-    """par is a Par object. par.name, par.eval(), par.owner."""
-    if par.name == 'Intensity':
-        op('/project1/bloom').par.threshold = par.eval()
-    return
-
-def onPulse(par):
-    """Pulse param was triggered."""
-    if par.name == 'Reset':
-        op('/project1/scene').par.index = 0
-        op('/project1/audio_player').par.cuepoint = 0
-        op('/project1/audio_player').par.cuepulse.pulse()
-    return
-
-def onExpressionChange(par, val, prev):
-    """User changed the expression on a param."""
-    return
-
-def onExportChange(par, val, prev):
-    """Export source changed."""
-    return
-
-def onModeChange(par, val, prev):
-    """Param mode changed (CONSTANT / EXPRESSION / EXPORT / etc)."""
-    return
-```
-
---
-
-## panelExecuteDAT — UI Events
-
-For interactive control surfaces. See `panel-ui.md` for the full panel COMP context.
-
-```python
-pe = root.create(panelExecuteDAT, 'btn_handler')
-pe.par.panel = '/project1/play_btn'
-pe.par.click = True              # mouse click events
-pe.par.value = True              # state changes (toggle)
-pe.par.lockedchange = False
-```
-
-```python
-def onOffToOn(panelValue):
-    """Panel value rose to 1 (button pressed, slider crossed threshold)."""
-    op('/project1/scene_timer').par.start.pulse()
-    return
-
-def onOnToOff(panelValue):
-    """Panel value dropped to 0."""
-    return
-
-def onValueChange(panelValue):
-    """Continuous: every frame the value changes."""
-    val = panelValue.eval()
-    op('/project1/master').par.opacity = val
-    return
-
-def onClick(panelValue):
-    """Discrete click event, fires once per click."""
-    return
-```
-
-`panelValue` is a `Par` object on the panel COMP.
-
---
-
-## opExecuteDAT — Operator Lifecycle
-
-Watches creation/deletion/renaming of operators in a parent COMP.
-
-```python
-oe = root.create(opExecuteDAT, 'lifecycle')
-oe.par.op = '/project1'
-oe.par.create = True
-oe.par.destroy = True
-oe.par.namechange = True
-oe.par.flagchange = False
-```
-
-```python
-def onCreate(opCreated):
-    """A new operator was created. Useful for auto-applying conventions."""
-    if opCreated.OPType == 'glslTOP':
-        # Always wrap with a null
-        n = opCreated.parent().create(nullTOP, opCreated.name + '_out')
-        n.inputConnectors[0].connect(opCreated)
-    return
-
-def onDestroy(opDestroyed):
-    """Operator was deleted. opDestroyed.path is still valid for one frame."""
-    return
-
-def onNameChange(opChanged):
-    """Operator was renamed."""
-    return
-```
-
-Useful for dev-time scaffolding (auto-create downstream nullTOPs, auto-name conventions). Disable in production projects to avoid surprise side effects.
-
---
-
-## executeDAT — Project Lifecycle & Per-Frame
-
-The catch-all. Gets you hooks into project start, save, load, frame-start, frame-end.
-
-```python
-exec_dat = root.create(executeDAT, 'lifecycle')
-exec_dat.par.start = True
-exec_dat.par.create = True
-exec_dat.par.framestart = True
-exec_dat.par.frameend = False
-```
-
-```python
-def onStart():
-    """Project just started cooking. Run once."""
-    op('/project1/scene').par.index = 0
-    debug('Project started')
-    return
-
-def onCreate():
-    """Component was just created (only fires for component executeDATs, not project root)."""
-    return
-
-def onFrameStart(frame):
-    """Per-frame, BEFORE network cooks. Heavy logic here = bottleneck."""
-    return
-
-def onFrameEnd(frame):
-    """Per-frame, AFTER network cooks. Use for capture, recording, post-network logic."""
-    return
-
-def onPlayStateChange(playing):
-    """Project play/pause toggled."""
-    return
-
-def onProjectPreSave():
-    """Right before saving the .toe file."""
-    return
-
-def onProjectPostSave():
-    return
-```
-
-Heavy per-frame logic in `onFrameStart` is one of the top performance regressions in TD projects. Use CHOPs for per-frame computation, scripts for events.
-
---
-
-## Pattern: Triggering an Animation Sequence on Beat
-
-```python
-# Source: a kick trigger CHOP
-# Goal: on each kick, run a 1.5s scale pulse + color flash
-
-# Setup (create once)
-animator = root.create(timerCHOP, 'pulse_anim')
-animator.par.length = 1.5
-animator.par.cycle = False
-
-# Param expressions on visual targets:
-op('logo').par.sx.expr = "1.0 + (1 - op('pulse_anim')['timer_fraction']) * 0.3"
-op('logo').par.sx.mode = ParMode.EXPRESSION
-op('logo').par.sy.expr = "1.0 + (1 - op('pulse_anim')['timer_fraction']) * 0.3"
-op('logo').par.sy.mode = ParMode.EXPRESSION
-
-# In a chopExecuteDAT watching the kick CHOP:
-def offToOn(channel, sampleIndex, val, prev):
-    op('pulse_anim').par.start.pulse()
-    return
-```
-
---
-
-## Pattern: Live Editing a CHOP from API Data
-
-```python
-# webDAT polls an API every 5 seconds
-# datExecuteDAT parses the response and writes to a constantCHOP
-
-def onTableChange(dat):
-    import json
-    try:
-        data = json.loads(dat.text)
-    except:
-        return
-    target = op('/project1/external_state')
-    target.par.name0 = 'temperature'
-    target.par.value0 = float(data['temp_c'])
-    target.par.name1 = 'humidity'
-    target.par.value1 = float(data['humidity'])
-    return
-```
-
-Visuals just reference `op('external_state')['temperature']` — they update live.
-
---
-
-## Pattern: Self-Cleaning Network
-
-```python
-# An opExecuteDAT watching for orphaned helper ops, deleting them after their parent disappears
-
-def onDestroy(opDestroyed):
-    parent_name = opDestroyed.name
-    helper = op(f'/project1/{parent_name}_helper')
-    if helper:
-        helper.destroy()
-    return
-```
-
---
-
-## Pitfalls
-
-1. **Callbacks crash silently** — exceptions print to the textport but don't show up in the UI. Always `td_clear_textport` before debugging, then `td_read_textport` after.
-2. **`debug()` vs `print()`** — both write to textport, but `debug()` includes the file/line of the calling DAT. Prefer `debug()` for scripts.
-3. **`val` is the new value, `prev` is old** — easy to swap. Always: `def offToOn(channel, sampleIndex, val, prev)`. Check parameter order in TD docs if confused.
-4. **`whileOn` and `valueChange` are per-frame** — heavy. Avoid unless absolutely needed. Drive via expressions instead.
-5. **Callbacks don't run during cooking-paused state** — if the parent COMP has `allowCooking=False`, callbacks freeze. Useful for "disable me" toggles.
-6. **`par` vs `panelValue`** — parameterExecuteDAT gives `par` (a Par object), panelExecuteDAT gives `panelValue` (also a Par-like object). Both have `.name` and `.eval()` but their context differs.
-7. **`opExecuteDAT` fires for itself** — when you create an opExecuteDAT, it can fire `onCreate` for itself if `par.create=True` and parent matches. Filter by `if opCreated == me: return`.
-8. **Reload behavior** — when reloading an extension (`td_reinit_extension`), all callback DATs reset their internal state. Module-level vars are lost. Persist state in tableDATs or the docked DAT itself, not in module globals.
-9. **Cooking dependencies** — if a callback writes to an op that's upstream of the callback's source, you get a cooking loop. TD warns about it but doesn't always block. Keep dataflow one-directional.
-10. **Active flag** — every Execute DAT has `par.active`. False = silent. Easy to toggle for testing without deleting wiring.
-
---
-
-## Quick Recipes
-
-| Goal | Setup |
-|---|---|
-| Beat trigger | `chopExecuteDAT.par.offtoon=True` watching a `triggerCHOP` |
-| API response handler | `datExecuteDAT.par.tablechange=True` watching a `webDAT` |
-| Custom button → action | `parameterExecuteDAT.par.pulse=True` watching a custom pulse param |
-| Slider → continuous param | `panelExecuteDAT.par.value=True` watching a `sliderCOMP` |
-| Run-once setup | `executeDAT.par.start=True` with logic in `onStart()` |
-| Per-frame metrics | `executeDAT.par.frameend=True` recording values to a CHOP |
-| Auto-name new ops | `opExecuteDAT.par.create=True` enforcing naming conventions |
@@ -1,322 +0,0 @@
-# External Data Reference
-
-Network and device I/O — HTTP requests, WebSockets, MQTT, Serial, TCP, UDP. For MIDI/OSC specifically see `midi-osc.md`.
-
-Common production needs:
- API polling / webhook ingestion
- Real-time data streams (sensors, market data, chat)
- IoT device control (Arduino, ESP32, smart lights)
- Inter-application messaging
- Hosting a tiny TD-side HTTP server for remote control
-
---
-
-## Web DAT — HTTP Requests
-
-```python
-web = root.create(webDAT, 'api_call')
-web.par.url = 'https://api.example.com/v1/status'
-web.par.fetchmethod = 'get'           # 'get' | 'post' | 'put' | 'delete'
-web.par.format = 'auto'                # 'auto' | 'text' | 'json'
-web.par.timeout = 5.0
-```
-
-**Triggering a request:**
-
-`webDAT` does NOT auto-fetch on cook. Trigger explicitly:
-
-```python
-web.par.fetch.pulse()
-```
-
-Or via expression on a CHOP value-change (chopExecuteDAT — see `dat-scripting.md`).
-
-**Authentication headers:**
-
-Use `webclientDAT` (more flexible) or set `webDAT` headers via the headers DAT:
-
-```python
-web_headers = root.create(tableDAT, 'headers')
-web_headers.appendRow(['Authorization', 'Bearer YOUR_TOKEN'])
-web_headers.appendRow(['Accept', 'application/json'])
-web.par.headers = web_headers.path
-```
-
-**Parsing JSON response:**
-
-```python
-import json
-
-def onTableChange(dat):
-    response = dat.text          # raw response body
-    data = json.loads(response)
-    # Update a tableDAT or store in a constantCHOP for downstream use
-    op('/project1/api_status').par.value0 = data['count']
-    return
-```
-
-Wire this in a `datExecuteDAT` watching the webDAT.
-
-**Polling pattern:**
-
-```python
-# timerCHOP fires every N seconds
-timer = root.create(timerCHOP, 'poll_timer')
-timer.par.length = 5.0
-timer.par.cycle = True
-
-# chopExecuteDAT on the timer's 'cycles' channel pulses the webDAT
-def offToOn(channel, sampleIndex, val, prev):
-    op('/project1/api_call').par.fetch.pulse()
-    return
-```
-
---
-
-## Web Client DAT — More Robust HTTP
-
-`webclientDAT` is the modern replacement for `webDAT` — supports streaming responses, chunked transfer, custom auth.
-
-```python
-client = root.create(webclientDAT, 'api')
-client.par.method = 'POST'
-client.par.url = 'https://api.example.com/events'
-client.par.uploadtype = 'json'
-client.par.uploaddata = '{"event": "scene_change", "scene": 3}'
-client.par.request.pulse()
-```
-
-Output goes to its child `webclient1_response` DAT. Use a `datExecuteDAT` to react.
-
---
-
-## Web Server DAT — TD as HTTP Server
-
-Hosts a tiny HTTP server inside TD. Useful for:
- Status/health endpoints
- Remote control from a phone or another machine
- Webhook receivers from external services
-
-```python
-server = root.create(webserverDAT, 'control_server')
-server.par.port = 8080
-server.par.active = True
-
-# Define handler in the docked callback DAT
-```
-
-In the auto-created `webserver1_callbacks` DAT:
-
-```python
-def onHTTPRequest(webServerDAT, request, response):
-    path = request['uri']
-    if path == '/status':
-        response['statusCode'] = 200
-        response['data'] = '{"fps": 60, "scene": "active"}'
-    elif path == '/scene':
-        idx = int(request['args'].get('index', 0))
-        op('/project1/scene_switch').par.index = idx
-        response['statusCode'] = 200
-        response['data'] = 'OK'
-    else:
-        response['statusCode'] = 404
-        response['data'] = 'Not Found'
-    return response
-```
-
-Test from terminal: `curl http://localhost:8080/status`.
-
-**Security:** No auth by default. Bind to localhost only or add a token check in the callback. Never expose to the public internet without auth.
-
---
-
-## WebSocket DAT — Bidirectional Real-Time
-
-For low-latency bidirectional streams (chat, live data feeds, controllers).
-
-### Client
-
-```python
-ws = root.create(websocketDAT, 'ws_client')
-ws.par.netaddress = 'wss://api.example.com/socket'
-ws.par.active = True
-```
-
-In the docked callbacks DAT:
-
-```python
-def onConnect(dat):
-    dat.sendText('{"action": "subscribe", "channel": "ticks"}')
-    return
-
-def onReceiveText(dat, rowIndex, message):
-    # message is a string; parse JSON, dispatch to ops
-    import json
-    data = json.loads(message)
-    op('/project1/price_chop').par.value0 = data['price']
-    return
-
-def onDisconnect(dat):
-    # Optionally schedule a reconnect
-    return
-```
-
-### Server
-
-```python
-ws = root.create(websocketDAT, 'ws_server')
-ws.par.mode = 'server'
-ws.par.port = 9001
-ws.par.active = True
-```
-
-Same callback structure with an additional `clientID` arg.
-
---
-
-## MQTT — Pub/Sub for IoT
-
-```python
-mqtt = root.create(mqttClientDAT, 'iot')
-mqtt.par.brokeraddress = 'broker.hivemq.com'
-mqtt.par.brokerport = 1883
-mqtt.par.clientid = 'td_install_01'
-mqtt.par.connect.pulse()
-
-# Subscribe in callbacks DAT:
-def onConnect(dat):
-    dat.subscribe('home/lights/+', qos=1)
-    return
-
-def onReceive(dat, topic, payload, qos, retained, dup):
-    # payload is bytes — decode if JSON
-    msg = payload.decode('utf-8')
-    # Dispatch by topic
-    return
-
-# Publish from anywhere:
-op('iot').publish('show/scene', 'sunset', qos=0, retain=False)
-```
-
-For Mosquitto / HiveMQ self-hosted brokers use the same setup with `tcp://192.168.x.x` and your local port.
-
---
-
-## Serial DAT — Arduino, USB Devices
-
-```python
-serial = root.create(serialDAT, 'arduino')
-serial.par.port = '/dev/cu.usbmodem14101'   # macOS — check Arduino IDE
-# Windows: 'COM3', 'COM4', etc.
-serial.par.baudrate = 115200
-serial.par.active = True
-```
-
-In callbacks:
-
-```python
-def onReceive(dat, rowIndex, line):
-    # Each newline-terminated line from Arduino arrives here
-    parts = line.split(',')
-    op('/project1/sensors').par.value0 = float(parts[0])
-    op('/project1/sensors').par.value1 = float(parts[1])
-    return
-```
-
-Send to Arduino:
-```python
-op('arduino').send('LED_ON\n')
-```
-
---
-
-## TCP/IP DAT — Custom Protocols
-
-For talking to non-HTTP servers (game servers, custom protocols, legacy systems).
-
-```python
-tcp = root.create(tcpipDAT, 'show_control')
-tcp.par.netaddress = '192.168.1.50'
-tcp.par.port = 7000
-tcp.par.protocol = 'tcp'        # 'tcp' | 'udp'
-tcp.par.active = True
-```
-
-Send / receive via callbacks similar to websocketDAT.
-
-For UDP-only (fire-and-forget, no connection), use `udpoutDAT` + `udpinDAT` — simpler but unreliable across networks.
-
---
-
-## Common Patterns
-
-### REST API → Visual
-
-```
-timerCHOP (5s loop)
-   → chopExecuteDAT (pulse webDAT.par.fetch on cycle)
-   → webDAT (returns JSON)
-   → datExecuteDAT (parse, write to constantCHOP)
-   → CHOP drives glsl uniform → visuals
-```
-
-### Webhook receiver
-
-```
-webserverDAT (port 8080, /webhook endpoint)
-   → callback writes to a tableDAT log + triggers a scene change
-```
-
-### Real-time stock/crypto ticker
-
-```
-websocketDAT (subscribe to feed)
-   → onReceiveText callback parses JSON
-   → writes to constantCHOP
-   → drives bar chart / typography animation
-```
-
-### IoT-controlled installation
-
-```
-MQTT → callback dispatches by topic
-   → /lights/main → constantCHOP drives lighting render
-   → /audio/volume → mathCHOP for master fader
-```
-
-### Two-way phone control
-
-```
-WebSocket server in TD
-   → simple HTML page on phone connects, sends slider values
-   → callback writes to ops
-   → TD pushes status back via dat.sendText() to phone UI
-```
-
---
-
-## Pitfalls
-
-1. **`webDAT` doesn't auto-fetch** — must explicitly pulse `par.fetch`. Easy to forget.
-2. **Blocking on slow APIs** — `webDAT` runs on the cook thread. A 30s API call freezes TD for 30s. Use `webclientDAT` (async) for anything potentially slow.
-3. **WebSocket reconnection** — TD does NOT auto-reconnect on disconnect. Implement backoff in `onDisconnect`.
-4. **Serial port permissions on macOS** — TD needs Full Disk Access OR the port needs to be unlocked via `sudo chmod 666 /dev/cu.usbmodem...` per session.
-5. **MQTT broker connection state** — `mqttClientDAT` may show `connected=true` but messages don't flow if QoS is wrong or topic ACL blocks. Check broker logs.
-6. **JSON parse errors crash callbacks silently** — wrap parses in try/except and log to textport. Otherwise the callback just stops firing.
-7. **Firewall on Windows** — first time `webserverDAT` binds, Windows pops a firewall dialog. Approve it or the server is unreachable.
-8. **CORS** — `webserverDAT` doesn't add CORS headers by default. If serving a webapp from a different origin, add `Access-Control-Allow-Origin: *` in the response.
-9. **Polling vs push** — polling burns API quota. Always prefer WebSocket / webhook / MQTT for high-frequency data.
-10. **Floating-point parsing** — sensor data over Serial often comes as strings. `float()` will crash on `'\n'` or `'NaN'`. Validate before converting.
-
---
-
-## Quick Recipes
-
-| Goal | Op chain |
-|---|---|
-| Periodic API fetch | `timerCHOP` → `chopExecuteDAT` pulses → `webDAT` → `datExecuteDAT` parses |
-| Webhook receiver | `webserverDAT` (port + path), callback writes to ops |
-| Real-time stream | `websocketDAT` client → onReceiveText → CHOP/DAT |
-| Arduino sensor → visual | `serialDAT` → callback → `constantCHOP` → expression on visual op |
-| TD ↔ phone control | `websocketDAT` server + simple HTML page on phone |
-| MQTT IoT integration | `mqttClientDAT` subscribe → callback dispatches by topic |
@@ -1,211 +0,0 @@
-# MIDI / OSC Reference
-
-External controller input and output — MIDI hardware, TouchOSC mobile UIs, OSC routing across the network.
-
-For audio-driven MIDI patterns (track triggers from spectrum analysis), see also `audio-reactive.md`.
-
---
-
-## MIDI Input — Hardware Controllers
-
-### Discovery
-
-List connected MIDI devices first. Use a `midiinDAT` to enumerate:
-
-```python
-mdat = root.create(midiinDAT, 'mid_devices')
-# Read available device names from the DAT after one cook
-```
-
-Or via Python directly:
-
-```python
-# In td_execute_python
-import td
-devices = [d for d in op.MIDI.devices]   # verify with td_get_docs('midi')
-```
-
-Verify the API with `td_get_docs(topic='midi')` since this varies between TD versions.
-
-### MIDI In CHOP
-
-Standard pattern:
-
-```python
-midi_in = root.create(midiinCHOP, 'midi_in')
-midi_in.par.device = 0               # device index from discovery
-midi_in.par.activechan = True
-```
-
-Output channels follow the convention `chCcN` and `chCnN`:
- `ch1c74` — channel 1, CC 74
- `ch1n60` — channel 1, note 60 (middle C) — value is velocity 0-127
-
-**Map a CC to a parameter:**
-
-```python
-op('/project1/bloom1').par.threshold.mode = ParMode.EXPRESSION
-op('/project1/bloom1').par.threshold.expr = "op('midi_in')['ch1c74'][0] / 127.0"
-```
-
-**Map a note as a trigger:**
-
-Notes in `midiinCHOP` output velocity while held, 0 when released. Use a `triggerCHOP` to convert a held note into pulses:
-
-```python
-trig = root.create(triggerCHOP, 'note_trig')
-trig.par.threshold = 1
-trig.par.triggeron = 'increase'
-trig.inputConnectors[0].connect(op('midi_in'))
-# Filter to a single channel via a selectCHOP if desired
-```
-
-### MIDI Learn Pattern
-
-Build a reusable learn pattern when you don't know the controller's CC layout in advance:
-
-1. Drop a `midiinCHOP` and `selectCHOP` after it.
-2. User wiggles the controller knob.
-3. Use `td_read_chop` on the midiinCHOP to identify which channel is non-zero — that's the active CC.
-4. Set the `selectCHOP.par.channames` to that channel name.
-5. Save the mapping to a `tableDAT` so it persists across sessions.
-
---
-
-## MIDI Output
-
-```python
-midi_out = root.create(midioutCHOP, 'midi_out')
-midi_out.par.device = 0
-midi_out.par.outputformat = 'continuous'    # 'continuous' | 'event'
-
-# Drive an output: send out a CC mapped from any 0-1 source
-src = root.create(constantCHOP, 'cc_src')
-src.par.name0 = 'ch1c20'
-src.par.value0 = 0.5
-midi_out.inputConnectors[0].connect(src)
-```
-
-For note events specifically, use `event` mode and pulse the value with a `pulseCHOP` or `triggerCHOP`.
-
---
-
-## OSC Input — Network Control
-
-OSC is the more flexible cousin of MIDI. Used heavily for:
- TouchOSC / Lemur mobile control surfaces
- Show control systems (QLab, Watchout)
- Inter-application sync (Ableton via Max for Live, Resolume, etc.)
-
-### OSC In CHOP
-
-```python
-osc_in = root.create(oscinCHOP, 'osc_in')
-osc_in.par.port = 7000             # listen on UDP 7000
-osc_in.par.localaddress = ''       # empty = all interfaces
-osc_in.par.queued = False          # immediate vs. queued processing
-```
-
-Each incoming OSC address becomes a channel. `/scene/1/intensity` becomes a channel named `scene_1_intensity` (TD sanitizes slashes to underscores).
-
-**Common gotcha:** TD only creates the channel after the FIRST message arrives at that address. Send a "hello" message from the controller during setup, or pre-declare channel names manually.
-
-### OSC In DAT (for raw events)
-
-Use a `oscinDAT` when you need full message access (multiple typed args, addresses with brackets/regex).
-
-```python
-osc_dat = root.create(oscinDAT, 'osc_events')
-osc_dat.par.port = 7001
-# Each row: timestamp, address, type tags, args...
-```
-
-Drive logic via a `datExecuteDAT` watching the `oscinDAT`:
-
-```python
-def onTableChange(dat):
-    last = dat[dat.numRows - 1, 'message']
-    parsed = last.val.split()
-    addr = parsed[0]
-    args = parsed[1:]
-    if addr == '/scene/trigger':
-        op('/project1/scene_switcher').par.index = int(args[0])
-    return
-```
-
---
-
-## OSC Output — Sending to External Apps
-
-```python
-osc_out = root.create(oscoutCHOP, 'osc_out')
-osc_out.par.netaddress = '127.0.0.1'    # destination IP
-osc_out.par.port = 9000
-
-# Channel names become OSC addresses
-src = root.create(constantCHOP, 'send')
-src.par.name0 = 'scene/intensity'        # → /scene/intensity
-src.par.value0 = 0.7
-osc_out.inputConnectors[0].connect(src)
-```
-
-**Channel-to-address mapping:** TD prepends `/` automatically. Use `/` in channel names to nest.
-
-For one-shot string/typed messages, use `oscoutDAT` and call `.sendOSC(address, args)`:
-
-```python
-op('osc_out_dat').sendOSC('/scene/trigger', [1, 'fade'])
-```
-
---
-
-## TouchOSC / Mobile UI Pattern
-
-Common setup for live VJ control from a phone/tablet:
-
-1. **Configure TouchOSC layout** — assign each control an OSC address like `/vj/master`, `/vj/scene/1`, etc.
-2. **Find your machine's LAN IP** — TouchOSC needs to point at it.
-3. **TD listens** on `oscinCHOP.par.port = 8000` (or whichever).
-4. **Map channels to params** via expressions:
-
-```python
-op('/project1/master_level').par.opacity.mode = ParMode.EXPRESSION
-op('/project1/master_level').par.opacity.expr = "op('osc_in')['vj_master']"
-```
-
-5. **Send feedback** to the controller via `oscoutCHOP` — useful for syncing state across multiple devices.
-
---
-
-## Network / Multi-Machine
-
-OSC over LAN works out-of-the-box. For multi-TD-instance sync (e.g., projection cluster):
-
- One TD acts as **master**, broadcasts `/sync/...` over OSC
- Worker TDs run `oscinCHOP` listening on the same port
- Use UDP **broadcast address** (e.g., `192.168.1.255`) on the master's `oscoutCHOP.par.netaddress` to hit all peers
-
-For reliability over WAN, use `webserverDAT` or `websocketDAT` with an external relay instead — UDP loss is invisible.
-
---
-
-## Pitfalls
-
-1. **MIDI device indexing** — device `0` is whichever device TD enumerated first. Reorder may shift it. Pin by name when possible.
-2. **OSC channel names** — TD doesn't create a channel until the first message lands. New channels invalidate cooked dependents on first arrival, causing a one-frame stutter.
-3. **OSC queued mode** — `par.queued = True` defers processing to a single per-frame batch. Lower latency but messages arriving same frame collapse to the last value. Off for triggers, on for continuous knobs.
-4. **MIDI clock vs. transport** — `midiinCHOP` reports clock if available. Use `midisyncCHOP` (if your TD version exposes it) or compute BPM from clock pulses (24 per quarter note).
-5. **Latency** — wired MIDI is ~1-3ms. WiFi OSC is 10-30ms with jitter. Use wired for tight beat-locked work.
-6. **Port conflicts** — only one process can bind a UDP port on most OS. If `oscinCHOP` shows no traffic, check that another app (Max, Ableton, etc.) isn't already listening on that port.
-
---
-
-## Quick Recipes
-
-| Goal | Op chain |
-|---|---|
-| Knob → bloom intensity | `midiinCHOP` → expression on `bloom.par.threshold` |
-| Note → scene change | `midiinCHOP` → `triggerCHOP` → `selectCHOP` → drive `switchTOP.par.index` |
-| Phone slider → master fader | TouchOSC `/master` → `oscinCHOP` → expression on output `level.par.opacity` |
-| TD → Resolume scene trigger | `oscoutCHOP` channel `composition/layers/1/clips/1/connect` → Resolume listening on 7000 |
-| Multi-projector sync | Master TD `oscoutCHOP` broadcast → workers `oscinCHOP` |
@@ -1,281 +0,0 @@
-# Panel & UI Reference
-
-Interactive control surfaces inside TouchDesigner — buttons, sliders, fields, custom parameter pages, panel callbacks. For HUD overlays (rendered text on visuals) see `layout-compositor.md`.
-
-Use cases:
- VJ control rack (master fader, scene buttons, FX toggles)
- Installation operator console
- Self-contained TOX components with their own parameter UIs
- Phone-style touch interfaces displayed on a tablet
-
---
-
-## Two Layers of UI
-
-| Layer | What it is | Use for |
-|---|---|---|
-| **Custom Parameters** | Params on any COMP, edited like built-in TD params | Configurable components, presets, "settings" panels |
-| **Panel COMPs** | Visible widgets (button, slider, field) inside a containerCOMP | Interactive control surfaces, real-time UIs |
-
-Combine both: build a containerCOMP with panel widgets that read/write custom parameters on a parent component.
-
---
-
-## Custom Parameters
-
-Add user-editable params to any COMP. Params persist with the COMP, drive expressions, and survive save/reload.
-
-```python
-# Add a custom page to a baseCOMP
-comp = op('/project1/my_component')
-page = comp.appendCustomPage('Controls')
-
-# Add typed params
-page.appendFloat('Intensity', label='Intensity')[0]   # returns a Par
-page.appendInt('Count', label='Count')[0]
-page.appendToggle('Enabled', label='Enabled')[0]
-page.appendMenu('Mode', menuNames=['off', 'soft', 'hard'], menuLabels=['Off', 'Soft', 'Hard'])[0]
-page.appendStr('Title', label='Title')[0]
-page.appendRGB('Color', label='Color')                # returns 3 pars
-page.appendXY('Offset', label='Offset')               # returns 2 pars
-page.appendPulse('Reset', label='Reset')[0]
-page.appendFile('TextureFile', label='Texture')[0]
-```
-
-**Read/write from anywhere:**
-
-```python
-val = op('/project1/my_component').par.Intensity.eval()
-op('/project1/my_component').par.Intensity = 0.7
-```
-
-**Drive other params via expression:**
-
-```python
-op('bloom1').par.threshold.mode = ParMode.EXPRESSION
-op('bloom1').par.threshold.expr = "op('/project1/my_component').par.Intensity"
-```
-
-**Pulse handler (Reset button):**
-
-Use a `parameterExecuteDAT` watching the COMP's pulse params. See `dat-scripting.md`.
-
---
-
-## Panel COMPs — The Widgets
-
-Each is a COMP that renders as a clickable/draggable widget inside a `containerCOMP`.
-
-| Type | Type Name | Use |
-|---|---|---|
-| Button | `buttonCOMP` | Click action — momentary or toggle |
-| Slider | `sliderCOMP` | Drag to set 0-1 value (1D or 2D) |
-| Field | `fieldCOMP` | Text input |
-| Container | `containerCOMP` | Layout + visual styling, holds children |
-| Select | `selectCOMP` | Reference and display content from another COMP |
-| List | `listCOMP` | Scrollable list with row callbacks |
-
-### Button
-
-```python
-btn = root.create(buttonCOMP, 'play_btn')
-btn.par.w = 120; btn.par.h = 40
-btn.par.buttontype = 'momentary'    # 'momentary' | 'toggleup' | 'togglepress' | 'radio'
-btn.par.bgcolorr = 0.1; btn.par.bgcolorg = 0.1; btn.par.bgcolorb = 0.1
-btn.par.text = 'Play'
-
-# Read state
-state = btn.panel.state          # 1 when active
-```
-
-### Slider
-
-```python
-sld = root.create(sliderCOMP, 'master_fader')
-sld.par.w = 60; sld.par.h = 300
-sld.par.style = 'vertical'        # 'vertical' | 'horizontal' | 'xy'
-sld.par.value0min = 0.0
-sld.par.value0max = 1.0
-
-# Drive a parameter via expression (always-on, no callback needed)
-op('/project1/master_level').par.opacity.mode = ParMode.EXPRESSION
-op('/project1/master_level').par.opacity.expr = "op('master_fader').panel.u"
-```
-
-`panel.u` and `panel.v` give the 0-1 normalized values. For 2D sliders both are populated.
-
-### Field (Text Input)
-
-```python
-fld = root.create(fieldCOMP, 'scene_name')
-fld.par.w = 200; fld.par.h = 30
-fld.par.fieldtype = 'string'      # 'string' | 'integer' | 'float'
-
-# Read current text
-text = fld.panel.field            # the text content
-```
-
-### List
-
-For scrollable lists with selectable rows, use the docked `list1_callbacks` DAT to handle row interactions. Set up cells via the `list_definition` table DAT.
-
---
-
-## Container COMP — Layout & Styling
-
-`containerCOMP` is the primary parent for grouping widgets and arranging layouts.
-
-```python
-panel = root.create(containerCOMP, 'control_panel')
-panel.par.w = 400; panel.par.h = 600
-panel.par.bgcolorr = 0.05
-panel.par.bgcolorg = 0.05
-panel.par.bgcolorb = 0.05
-panel.par.bgalpha = 1.0
-
-# Layout child panels in vertical stack
-panel.par.align = 'lefttoright'   # 'lefttoright' | 'toptobottom' | etc.
-```
-
-Children are positioned automatically based on `par.align`. For absolute positioning use `par.align = 'fillresize'` and set each child's `par.x` / `par.y`.
-
-### Layout Strategies
-
-| `par.align` | Behavior |
-|---|---|
-| `lefttoright` | Children stacked horizontally |
-| `toptobottom` | Children stacked vertically |
-| `righttoleft` / `bottomtotop` | Reversed stacks |
-| `fillresize` | Children sized to fill, manual positioning |
-| `top` / `bottom` / `left` / `right` | Fixed positioning |
-
-For complex grids: nest containers — vertical container holding horizontal containers.
-
---
-
-## Panel Callbacks — Reacting to Events
-
-`panelExecuteDAT` watches a panel and fires Python callbacks on user interaction.
-
-```python
-pe = root.create(panelExecuteDAT, 'btn_handler')
-pe.par.panel = '/project1/play_btn'
-pe.par.click = True              # respond to clicks
-pe.par.value = True              # respond to value changes
-```
-
-In its docked DAT:
-
-```python
-def onOffToOn(panelValue):
-    # Click pressed
-    op('/project1/scene_timer').par.start.pulse()
-    return
-
-def onOnToOff(panelValue):
-    # Click released
-    return
-
-def onValueChange(panelValue):
-    # Slider drag, field change, etc.
-    new_val = panelValue.eval()
-    op('/project1/master').par.opacity = new_val
-    return
-```
-
-For pulse params on custom-parameter pages, use a `parameterExecuteDAT` instead.
-
---
-
-## Building a Complete VJ Control Panel
-
-End-to-end pattern:
-
-```python
-# 1. Top-level container
-panel = root.create(containerCOMP, 'vj_control')
-panel.par.w = 800; panel.par.h = 200
-panel.par.align = 'lefttoright'
-
-# 2. Master fader column
-master_col = panel.create(containerCOMP, 'master')
-master_col.par.w = 120; master_col.par.h = 200
-master_col.par.align = 'toptobottom'
-
-master_label = master_col.create(textTOP, 'lbl')
-master_label.par.text = 'MASTER'
-
-master_sld = master_col.create(sliderCOMP, 'fader')
-master_sld.par.w = 60; master_sld.par.h = 150
-master_sld.par.style = 'vertical'
-
-# 3. Scene buttons row
-scene_col = panel.create(containerCOMP, 'scenes')
-scene_col.par.w = 400; scene_col.par.h = 200
-scene_col.par.align = 'lefttoright'
-for i in range(8):
-    b = scene_col.create(buttonCOMP, f'scene_{i+1}')
-    b.par.w = 50; b.par.h = 50
-    b.par.text = str(i+1)
-    b.par.buttontype = 'radio'      # only one active at a time
-
-# 4. FX toggle column
-fx_col = panel.create(containerCOMP, 'fx')
-fx_col.par.w = 280; fx_col.par.h = 200
-fx_col.par.align = 'toptobottom'
-for fx in ['Bloom', 'CRT', 'Glitch', 'Strobe']:
-    t = fx_col.create(buttonCOMP, fx.lower())
-    t.par.w = 220; t.par.h = 35
-    t.par.text = fx
-    t.par.buttontype = 'toggleup'
-
-# 5. Display in a window
-win = root.create(windowCOMP, 'control_win')
-win.par.winop = panel.path
-win.par.winw = 800; win.par.winh = 200
-win.par.borders = True
-win.par.winopen.pulse()
-```
-
-Then wire panel values to ops via expressions or panelExecuteDATs.
-
---
-
-## Showing the Panel — Window or Embedded
-
-| Approach | When |
-|---|---|
-| `windowCOMP` pointing at panel | Standalone control surface, separate display |
-| Render the containerCOMP via `renderTOP` | Composite UI over visuals (HUD-style) |
-| Use a `panelCOMP` directly inside a network editor pane | Designer/dev preview only — panel is fully interactive |
-
-For a touch-screen tablet, use a `windowCOMP` on a second display routed to the tablet's HDMI input.
-
---
-
-## Pitfalls
-
-1. **Panel won't respond to clicks** — likely `par.disabled = True` or the parent container has `par.disableinputs = True`. Check the panel hierarchy.
-2. **Slider value not updating** — `panel.u/v` reads the visual position. If you set `par.value0` directly, the visual lags. Use `par.value0` AS the source of truth and let the slider follow.
-3. **Custom param won't appear** — must call `appendCustomPage` first, then append params. Pages with no params don't show.
-4. **Custom param disappears on reload** — params added via Python at runtime persist only if the COMP is saved AFTER. Use a `tox` save (`comp.save('mycomp.tox')`) or commit via `td_execute_python` then save the project.
-5. **Event callback fires twice** — both `onOffToOn` and `onValueChange` may fire on a single button press. Pick one to handle the action; don't double-trigger.
-6. **Pulse params need `.pulse()`** — setting `par.X = True` on a pulse param does nothing. Always use `.pulse()`.
-7. **Field text doesn't commit until Tab/Enter** — fields don't fire callbacks while typing. Use `par.committemode = 'all'` to fire on every keystroke (heavy).
-8. **`par.text` vs panel content** — `buttonCOMP.par.text` is the LABEL on the button. The button's STATE is `panel.state` (0/1). Don't confuse them.
-9. **Touch input on macOS** — multi-touch via direct touch panels works but TD's gesture handling is rudimentary. For complex multi-touch (pinch/rotate), use TouchOSC on a tablet instead.
-10. **Layout doesn't update** — changing `par.align` requires the container to re-cook. Touch a child or pulse the container to trigger.
-
---
-
-## Quick Recipes
-
-| Goal | Setup |
-|---|---|
-| Master fader | `sliderCOMP` (vertical) → expression on `level.par.opacity` |
-| Scene picker | 8 `buttonCOMP` (radio) → `selectCHOP` on their state → drive `switchTOP.par.index` |
-| FX toggle | `buttonCOMP` (toggleup) → expression on `bypass` of an FX op |
-| Numeric input | `fieldCOMP` (float) → expression on target par |
-| Component settings | Custom params on the component COMP, panel widgets inside drive them |
-| Touch tablet UI | `containerCOMP` with widgets → `windowCOMP` to second display |
-| Status display | `textTOP` rendered into the panel via `selectCOMP` |
@@ -1,245 +0,0 @@
-# Particles Reference
-
-Particle systems in TouchDesigner — modern POPs (Particle Operators) and the legacy particleSOP path.
-
-For instancing static geometry (without per-instance lifetime/velocity), see `geometry-comp.md`. For GLSL-driven feedback simulations (no particle abstraction), see `operator-tips.md` (Feedback TOP section).
-
-Always call `td_get_par_info` for the op type before setting params. Param names below reflect TD 2025.32 — verify before relying on them.
-
---
-
-## Two Paths: POPs vs. SOPs
-
-| | **POP family** (modern) | **particleSOP** (legacy) |
-|---|---|---|
-| GPU? | Yes (compute) | No (CPU) |
-| Particle count | 100k+ comfortably | ~5k before slowdown |
-| API style | Source / Force / Solver / Render chain | Single op with many params |
-| Use for | New projects, anything intensive | Quick demos, low counts, TD < 2023 |
-
-**Default to POPs.** Only fall back to particleSOP if a POP variant of an op you need doesn't exist.
-
---
-
-## POP Pipeline Overview
-
-A POP system is a chain of operators inside a `geometryCOMP`:
-
-```
-popSourceTOP / popSourceSOP   ← spawn new particles
-        ↓
-popForceTOP (gravity, wind, etc.)
-        ↓
-popForceTOP (attractor, vortex, ...)
-        ↓
-popDeleteTOP (lifetime, bounds)
-        ↓
-popSolverTOP                  ← integrates velocity, updates positions
-        ↓
-[render via geometryCOMP / glslMAT instancing]
-```
-
-POP buffers carry standard channels: `P` (position), `v` (velocity), `life`, `id`, `Cd` (color), plus any custom channels you add.
-
---
-
-## Minimal POP Setup
-
-```python
-# Create a geometry COMP to hold the POP network
-geo = root.create(geometryCOMP, 'particles_geo')
-
-# 1. Source — emit particles from a point
-src = geo.create(popSourceTOP, 'src')
-src.par.birthrate = 500          # per second
-src.par.life = 4.0                # seconds
-
-# 2. Gravity force
-grav = geo.create(popForceTOP, 'gravity')
-grav.par.forcetype = 'gravity'
-grav.par.fy = -9.8
-
-# 3. Lifetime cleanup
-delp = geo.create(popDeleteTOP, 'cull')
-delp.par.condition = 'lifeleq'    # delete when life <= 0
-delp.par.value = 0
-
-# 4. Solver
-solv = geo.create(popSolverTOP, 'solver')
-solv.par.timestep = 'frame'
-
-# Wire: source → force → delete → solver
-src.outputConnectors[0].connect(grav.inputConnectors[0])
-grav.outputConnectors[0].connect(delp.inputConnectors[0])
-delp.outputConnectors[0].connect(solv.inputConnectors[0])
-```
-
-The `popSolverTOP` output IS the live particle buffer. Render it via `glslMAT` instancing on a small SOP (sphere, point) as the "shape" of each particle.
-
---
-
-## Common Forces
-
-| Force type | Effect | Common params |
-|---|---|---|
-| `gravity` | Constant directional pull | `fx`, `fy`, `fz` |
-| `wind` | Constant velocity addition | `wx`, `wy`, `wz` |
-| `drag` | Velocity damping over time | `dragstrength` |
-| `noise` | Curl-noise turbulence | `noiseamp`, `noisefreq`, `noiseseed` |
-| `attractor` | Pull toward a point | `position`, `strength`, `falloff` |
-| `vortex` | Swirl around an axis | `axis`, `strength` |
-| `point` (custom) | GLSL-evaluated arbitrary force | via `popforceadvancedTOP` |
-
-Stack multiple `popForceTOP`s in series — each modifies velocity additively.
-
---
-
-## Lifecycle Patterns
-
-### Continuous emission (e.g. smoke plume)
-
-```python
-src.par.birthrate = 800
-src.par.life = 6.0       # variance via 'lifevariance'
-src.par.lifevariance = 1.5
-```
-
-### Burst emission (e.g. explosion)
-
-```python
-src.par.birthrate = 0    # no continuous emission
-src.par.burst.pulse()    # one burst on demand (verify param name)
-src.par.burstcount = 5000
-src.par.life = 1.5
-```
-
-### Beat-triggered burst
-
-Wire a `triggerCHOP` (from audio or MIDI) to pulse the burst:
-
-```python
-op('/project1/audio_kick_trigger').outputConnectors[0].connect(...)
-# Then via a chopExecuteDAT, on each kick:
-def offToOn(channel, sampleIndex, val, prev):
-    op('/project1/particles_geo/src').par.burst.pulse()
-    return
-```
-
---
-
-## Rendering Particles
-
-### Point Sprites (simplest)
-
-```python
-# Inside the geometryCOMP, render the solver output directly
-# The geo's first SOP child becomes the geometry
-# But for POPs, we typically render via glslMAT on a small "shape"
-
-# Simple billboard sphere per particle:
-shape = geo.create(sphereSOP, 'shape')
-shape.par.rad = 0.05
-shape.par.rows = 6; shape.par.cols = 6   # low-poly to keep it fast
-
-# Material that uses POP buffer for instancing
-mat = root.create(glslMAT, 'particle_mat')
-# Configure mat.par.instancingTOP = solver output (verify param name)
-```
-
-The exact instancing setup varies by TD version — call `td_get_hints(topic='popInstancing')` (or `popRender` / `instancing` — try a few).
-
-### GPU Sprites via glslcopyPOP
-
-For dense smoke/fire-like effects, use a `glslcopyPOP` that writes per-particle color/size from a compute shader, then render as point sprites with additive blending in a `renderTOP`.
-
---
-
-## Collisions
-
-```python
-# Collision detection against an SOP
-coll = geo.create(popCollideTOP, 'ground_coll')
-coll.par.collidewithsop = '/project1/ground_geo'  # path to colliding SOP
-coll.par.bounce = 0.3
-coll.par.friction = 0.1
-# Insert between force and solver
-```
-
-For plane/box collisions only, use `popPlaneCollideTOP` (cheaper).
-
---
-
-## Custom Per-Particle Data
-
-Add a custom channel via `popAttribCreateTOP` (or by writing through `glslcopyPOP`):
-
-```python
-# Add a "phase" attribute initialized random per-particle, used in render shader
-attr = geo.create(popAttribCreateTOP, 'add_phase')
-attr.par.attribname = 'phase'
-attr.par.value0 = 'rand(@id)'   # expression in TD's POP attribute language
-```
-
-Then in the render shader, `texture(sTDPOPInputs[0].phase, ...)` (or whichever sampler convention your TD version uses — verify with `td_get_docs(topic='pops')`).
-
---
-
-## Legacy particleSOP (Use Sparingly)
-
-For quick demos or low-count systems:
-
-```python
-# Inside a geo
-psrc = geo.create(addSOP, 'point_src')      # source: a single point
-psrc.par.points = '0 0 0'
-
-part = geo.create(particleSOP, 'particles')
-part.par.life = 3.0
-part.par.birthrate = 100
-part.par.gravityy = -9.8
-part.par.windx = 0.5
-part.inputConnectors[0].connect(psrc)
-```
-
-CPU-bound. Beyond ~5,000 active particles you'll see frame drops.
-
---
-
-## Pitfalls
-
-1. **Particles don't appear** — usually a render-side issue. Check via `td_get_screenshot` on the solver output (renders the buffer as a TOP-like view in newer TD). Then check the `geometryCOMP`'s render path.
-2. **Burst won't fire** — verify the `burst` param is a pulse, not a toggle. Pulses must use `.pulse()`, not `= True`.
-3. **Particles teleport on first frame** — uninitialized velocity. Set `popSourceTOP.par.initialvelocityX/Y/Z` or zero them explicitly.
-4. **Gravity feels wrong** — TD's "1 unit" depends on your scene scale. Start with `fy = -1.0` and scale up rather than using real-world 9.8.
-5. **High birthrate = stuttering** — birthrate is per-second, not per-frame. At 60fps, `birthrate = 6000` is 100/frame which is fine; `birthrate = 600000` will tank.
-6. **POP solver order matters** — forces apply in the order they appear in the chain. Putting gravity AFTER drag dampens gravity itself; usually not what you want.
-7. **Instancing param name varies** — `mat.par.instancingTOP` vs. `mat.par.instanceop` vs. `mat.par.instances` differs across TD versions. Always check `td_get_par_info(op_type='glslMAT')`.
-8. **Cooking dependency loops** — POP solvers create implicit time-loops. The "cook dependency loop" warning is expected and harmless for POPs.
-9. **CHOP-driven force values** — when a force param is expression-bound to a CHOP (e.g., audio-reactive gravity), make sure the CHOP cooks before the solver. If not, force lags by one frame.
-
---
-
-## Performance Targets
-
-| Particle count | Setup | Frame budget @ 60fps |
-|---|---|---|
-| < 1k | particleSOP fine | trivial |
-| 1k - 10k | POPs, simple forces | ~2-5ms |
-| 10k - 100k | POPs, GPU-only forces | ~5-15ms |
-| 100k+ | `glslcopyPOP`, custom compute | ~10-25ms |
-| 1M+ | Custom GPU buffer, no POP framework | depends on shader |
-
-Use `td_get_perf` to find which op in the POP chain is the bottleneck.
-
---
-
-## Quick Recipes
-
-| Goal | Pipeline |
-|---|---|
-| Smoke plume | `popSourceTOP` (point) → gravity + wind + noise → `popDeleteTOP` (life) → solver → glslMAT instancing |
-| Beat-triggered burst | `triggerCHOP` (audio) → chopExecuteDAT pulses `popSourceTOP.par.burst` |
-| Fireworks shell | Burst at point → drag + gravity → secondary burst on lifetime threshold |
-| Snow/rain | Continuous emission across XZ plane (high y), gravity + small wind, infinite life box-deleted |
-| Sparks | Burst, very short life (0.3s), bright additive render, motion blur via feedback |
-| Audio particles | Birthrate driven by audio envelope, color driven by frequency band |
@@ -1,211 +0,0 @@
-# Projection Mapping Reference
-
-Multi-window output, surface mapping, edge blending, and projector calibration patterns for installation/event work.
-
-For HUD layouts and on-screen panel grids, see `layout-compositor.md`. For wireframe/test-pattern generation, see `operator-tips.md`.
-
---
-
-## Window COMP — Output to a Display
-
-The `windowCOMP` is how TD pushes pixels to a real display.
-
-```python
-win = root.create(windowCOMP, 'output_window')
-win.par.winop = '/project1/final_out'   # path to the TOP being displayed
-win.par.winw = 1920
-win.par.winh = 1080
-win.par.winoffsetx = 0                  # screen-space offset
-win.par.winoffsety = 0
-win.par.borders = False                 # no chrome
-win.par.alwaysontop = True
-win.par.cursor = False                  # hide cursor in fullscreen
-win.par.justify = 'fillaspect'          # 'fill' | 'fitaspect' | 'fillaspect' | 'native'
-win.par.winopen.pulse()                 # OPEN the window
-```
-
-To target a specific physical display, set `par.location`:
-
-```python
-win.par.location = 'secondary'          # 'primary' | 'secondary' | 'monitor1' | 'monitor2' | ...
-```
-
-Or set absolute coordinates using `winoffsetx/y` matched to your OS display layout.
-
-**Always pulse `winopen` — setting params alone doesn't open the window.**
-
---
-
-## Multi-Window Output
-
-For multi-projector or multi-display setups, create one `windowCOMP` per output, each pointing at a different TOP.
-
-```python
-for i, screen_top in enumerate(['out_left', 'out_center', 'out_right']):
-    w = root.create(windowCOMP, f'win_{i}')
-    w.par.winop = f'/project1/{screen_top}'
-    w.par.winw = 1920; w.par.winh = 1080
-    w.par.winoffsetx = i * 1920
-    w.par.winoffsety = 0
-    w.par.borders = False
-    w.par.alwaysontop = True
-    w.par.cursor = False
-    w.par.winopen.pulse()
-```
-
-For ultra-wide single-output spans, use ONE windowCOMP at e.g. 5760×1080 spanning three projectors via the GPU's mosaic/spanning mode (Nvidia Mosaic, AMD Eyefinity), then split content via `cropTOP` per screen inside TD.
-
---
-
-## 4-Point Corner Pin (Quad Warp)
-
-The simplest projection mapping primitive — warping a rectangle onto a quadrilateral.
-
-```python
-# Source content
-src = op('/project1/scene_out')
-
-# Manual: cornerPinTOP (TD has this built-in)
-cp = root.create(cornerPinTOP, 'corner_pin')
-cp.par.tlx = 0.05; cp.par.tly = 0.10    # top-left (normalized 0-1)
-cp.par.trx = 0.95; cp.par.try = 0.08    # top-right
-cp.par.brx = 0.93; cp.par.bry = 0.92    # bottom-right
-cp.par.blx = 0.07; cp.par.bly = 0.94    # bottom-left
-cp.inputConnectors[0].connect(src)
-```
-
-Alternative: use a `geometryCOMP` with a `gridSOP` and bend the verts in vertex GLSL. More flexible (curved surfaces) but more setup.
-
-Verify TD 2025.32 param names with `td_get_par_info(op_type='cornerPinTOP')`.
-
---
-
-## Bezier / Mesh Warp (Curved Surfaces)
-
-For non-flat surfaces (domes, columns, curved walls), use a subdivided mesh and per-vertex displacement.
-
-### Pattern: Grid Mesh + GLSL Displacement
-
-```python
-# Subdivided grid in a geo
-geo = root.create(geometryCOMP, 'warp_geo')
-grid = geo.create(gridSOP, 'warp_grid')
-grid.par.rows = 32          # higher = smoother curve
-grid.par.cols = 32
-grid.par.sizex = 2; grid.par.sizey = 2
-
-# Texture the source onto it
-mat = root.create(constMAT, 'warp_mat')      # use constMAT for unlit projection
-mat.par.maptop = '/project1/scene_out'        # source TOP
-
-geo.par.material = mat.path
-
-# Render to a TOP that goes to the projector window
-cam = root.create(cameraCOMP, 'cam_proj')
-cam.par.tz = 4
-
-render = root.create(renderTOP, 'projection_out')
-render.par.camera = cam.path
-render.par.geometry = geo.path
-render.par.outputresolution = 'custom'
-render.par.resolutionw = 1920; render.par.resolutionh = 1080
-```
-
-For per-vertex offsets, write a vertex GLSL on the constMAT (or use `glslMAT`) and read displacement values from a CHOP via uniform.
-
-Calibration is iterative: render a checkerboard from `scene_out`, project it, photograph the projection, manually nudge corner/grid points until aligned.
-
---
-
-## Edge Blending (Multi-Projector Overlap)
-
-When two projectors overlap, the overlap region is twice as bright. Blend by ramping each projector's edge alpha to 0 across the overlap zone.
-
-### GLSL Edge Blend Shader
-
-Per-projector output pass that fades the inside edge to black:
-
-```glsl
-// edge_blend_pixel.glsl
-out vec4 fragColor;
-uniform float uBlendLeft;     // overlap width on left edge (0-0.5, 0=no blend)
-uniform float uBlendRight;
-uniform float uGamma;          // typically 2.2 — perceptual ramp
-
-void main() {
-    vec2 uv = vUV.st;
-    vec4 col = texture(sTD2DInputs[0], uv);
-
-    float aL = (uBlendLeft  > 0.0) ? smoothstep(0.0, uBlendLeft, uv.x) : 1.0;
-    float aR = (uBlendRight > 0.0) ? smoothstep(0.0, uBlendRight, 1.0 - uv.x) : 1.0;
-    float a = pow(aL * aR, uGamma);
-
-    fragColor = TDOutputSwizzle(vec4(col.rgb * a, 1.0));
-}
-```
-
-Apply this to each overlap-touching projector's output. Tune `uBlendLeft` / `uBlendRight` to match your physical overlap.
-
-For top/bottom blends or cylindrical setups, extend the shader with `uBlendTop` / `uBlendBottom`.
-
---
-
-## Calibration Patterns
-
-Useful test patterns for aligning projectors. Build a `switchTOP` selecting one of these, route to all projector windows during setup.
-
-```python
-# Solid white — for brightness/uniformity check
-white = root.create(constantTOP, 'cal_white')
-white.par.colorr = 1.0; white.par.colorg = 1.0; white.par.colorb = 1.0
-
-# Centered crosshair — for keystone alignment
-gridcross = root.create(textTOP, 'cal_cross')
-gridcross.par.text = '+'
-gridcross.par.fontsizex = 200
-
-# Fine grid — for warp/mesh alignment (use rampTOP + math + threshold, or build via GLSL)
-# Color bars for projector color calibration
-bars = root.create(rampTOP, 'cal_bars')
-bars.par.type = 'horizontal'
-```
-
-Or use the bundled `testpatternTOP` if your TD version includes it.
-
---
-
-## Projection Audit Workflow
-
-When debugging a multi-screen setup:
-
-1. Render a unique color and label per output (`textTOP` saying "LEFT", "CENTER", "RIGHT").
-2. Check that each window is sourcing the correct path: `td_get_operator_info(path='/project1/win_0')`.
-3. Verify display assignment: walk to each projector and confirm visually.
-4. Check resolution: physical projector native res vs. TD output res — mismatches cause scaling artifacts.
-5. Cook flag: `td_get_perf` — if a window's source TOP isn't cooking, the projector shows last frame frozen.
-
---
-
-## Pitfalls
-
-1. **Window won't open** — you forgot `winopen.pulse()`. Setting params alone doesn't open it.
-2. **Wrong display** — `par.location='secondary'` depends on OS display order. Set `winoffsetx/y` to absolute coords as a more reliable override.
-3. **Cursor visible** — set `par.cursor = False` BEFORE opening, or close+reopen.
-4. **Black projection** — usually a cooking issue. Verify `final_out` TOP is cooking via `td_get_perf`. Check `td_get_errors` recursively from `/`.
-5. **Tearing / vsync** — `windowCOMP` honors `par.vsync`. For projection always set `vsync='vsync'` (default). Tearing means GPU is over-budget — reduce render resolution.
-6. **Aspect mismatch** — projector native is often 1920×1200 (16:10) not 1080. Use `justify='fitaspect'` or render at native projector res.
-7. **Non-Commercial license** — caps total resolution at 1280×1280. For real installation work you need Commercial. Pro license adds 4K+.
-8. **Multiple monitors on macOS** — `windowCOMP` honors macOS Spaces. Disable Spaces or pin TD to a specific display in System Settings before showtime.
-
---
-
-## Quick Recipes
-
-| Goal | Approach |
-|---|---|
-| Single fullscreen output | One `windowCOMP`, `justify='fillaspect'`, `winopen.pulse()` |
-| 3-projector wide span | 3 `windowCOMP` + per-output `cropTOP` from one wide source |
-| Single quad surface | `cornerPinTOP` → `windowCOMP` |
-| Curved/dome | Subdivided gridSOP with vertex GLSL → `renderTOP` → `windowCOMP` |
-| Edge blend overlap | GLSL fade shader per projector → `windowCOMP` |
-| Calibration mode | `switchTOP` between scene and test patterns, hot-key triggered |
@@ -1,198 +0,0 @@
-# Replicator COMP Reference
-
-The `replicatorCOMP` clones a template operator N times, driven by a table of data. The fundamental TD pattern for data-driven networks: button grids, scene rosters, dynamic UI, parameter panels per-channel.
-
-For visual instancing (per-pixel/per-render copies), see `geometry-comp.md`. Replicator builds NETWORK NODES; instancing builds RENDER COPIES. Different layer.
-
---
-
-## Concept
-
-```
-[Template OP]                  [Data tableDAT]
-       │                              │
-       └─────→ replicatorCOMP ←───────┘
-                     │
-                     ▼
-        [N clones], one per data row
-        Each clone gets per-row params
-```
-
-Edit the template once → all clones inherit. Edit the table → clones add/remove dynamically. Push parameter overrides per-row.
-
---
-
-## Minimal Setup
-
-```python
-# 1. Make a template (the thing to clone)
-template = root.create(buttonCOMP, 'btn_template')
-template.par.w = 80; template.par.h = 80
-template.par.text = 'X'
-template.par.bgcolorr = 0.2
-
-# 2. Make a data table (one row per clone)
-data = root.create(tableDAT, 'scene_data')
-data.appendRow(['name', 'color_r', 'color_g', 'color_b'])
-data.appendRow(['Sunset', 1.0, 0.4, 0.0])
-data.appendRow(['Midnight', 0.0, 0.1, 0.4])
-data.appendRow(['Storm', 0.3, 0.3, 0.5])
-data.appendRow(['Forest', 0.0, 0.5, 0.2])
-
-# 3. Replicator — points at template + data
-rep = root.create(replicatorCOMP, 'scene_buttons')
-rep.par.template = template.path
-rep.par.opfromdat = data.path
-rep.par.namefromdatname = 'name'        # use 'name' column for clone names
-rep.par.incrementalnumbering = False
-```
-
-After cooking, the replicator creates 4 child COMPs named `Sunset`, `Midnight`, `Storm`, `Forest` (one per non-header row), each cloned from `btn_template`.
-
---
-
-## Per-Row Parameter Overrides
-
-The replicator's docked `replicator1_callbacks` DAT lets you customize each clone:
-
-```python
-def onReplicate(comp, allOps, newOps, template, master):
-    """Called once per replicate cycle. newOps is the list of just-created clones."""
-    data = op('scene_data')
-    for i, clone in enumerate(newOps):
-        row = i + 1                 # +1 to skip header
-        clone.par.text = data[row, 'name'].val
-        clone.par.bgcolorr = float(data[row, 'color_r'].val)
-        clone.par.bgcolorg = float(data[row, 'color_g'].val)
-        clone.par.bgcolorb = float(data[row, 'color_b'].val)
-    return
-```
-
-Or use parameter expressions referencing `digits` (the per-clone index, available as a built-in expression token inside the cloned subtree):
-
-```python
-# Inside the template, set a param expression like:
-# par.value0.expr = "op('../scene_data')[me.digits + 1, 'value']"
-```
-
-`me.digits` resolves to the row index of the current clone. This is the cleanest way for static reference patterns — no callback needed.
-
---
-
-## Layout: Buttons in a Grid
-
-Drop the replicator inside a `containerCOMP` with auto-layout:
-
-```python
-panel = root.create(containerCOMP, 'scene_panel')
-panel.par.w = 400; panel.par.h = 100
-panel.par.align = 'lefttoright'
-
-# Move the replicator inside
-rep.parent = panel.path           # or create rep as a child of panel directly
-```
-
-Each clone is a child of the replicator (which itself is a child of the panel). The panel auto-arranges everything.
-
-For a 2D grid, set `par.align = 'fillresize'` on the container and override `par.x` / `par.y` per clone in the callback based on row/col index.
-
---
-
-## Updating Without Rebuilding
-
-When the data table changes, the replicator regenerates the clones. By default it destroys and recreates everything. To preserve state, set:
-
-```python
-rep.par.recreatemissing = True       # only add/remove changed rows
-rep.par.recreateallonchange = False
-```
-
-This pattern is essential for live-edit scenarios (designer adjusts table, network keeps running).
-
-For incremental data ingestion (e.g., from a `webDAT` polling an API), have a `datExecuteDAT` watch the response, parse, write to the data table, and the replicator self-updates.
-
---
-
-## Common Patterns
-
-### Scene Roster (Data → Buttons + Logic)
-
-```python
-# Data per scene: name, file path, audio track, BPM
-scene_data.appendRow(['name', 'file', 'audio', 'bpm'])
-scene_data.appendRow(['Intro', '/scenes/intro.tox', '/audio/intro.wav', 110])
-scene_data.appendRow(['Main', '/scenes/main.tox', '/audio/main.wav', 128])
-
-# Replicator clones a buttonCOMP per scene
-# Each button's onClick callback loads the corresponding tox + cues audio
-```
-
-### Dynamic Parameter Panel
-
-For a list of audio bands, generate a fader strip per band:
-
-```python
-# Data: band names (sub, low, mid, hi-mid, high, air)
-# Template: containerCOMP with label + sliderCOMP
-# Replicator clones N strips
-# Each slider's value is read at /audio_eq/{band_name}/fader
-```
-
-### Procedural Visual Network
-
-Build a multi-channel visual network from a config file:
-
-```python
-# Data: which TOPs to chain, per "scene"
-# Template: a baseCOMP with placeholder children
-# Replicator builds one baseCOMP per scene; each scene contains a custom chain
-# Switch between scenes via switchTOP.par.index driven by panel
-```
-
-### Per-Channel CHOP Display
-
-Visualize each channel of a multi-channel CHOP separately:
-
-```python
-# Data table: one row per channel (auto-extracted via choptodatDAT)
-# Template: a small chopVis COMP showing one channel
-# Replicator generates N visualizers stacked vertically
-```
-
---
-
-## Replicator vs. Pure Python Loop
-
-| Approach | When to use |
-|---|---|
-| **replicatorCOMP** | The set of clones changes (add/remove rows live). Visual editor expectations. Pattern is reusable across projects. |
-| **Python loop** (in `td_execute_python`) | One-shot generation. Static set. Simpler logic, no template overhead. Faster to write. |
-
-If you'll only ever build the network once, prefer a Python loop with `td_execute_python`. The replicator earns its weight when data is live.
-
---
-
-## Pitfalls
-
-1. **Header row** — `tableDAT` rows are 0-indexed. If you have a header, your first data row is index 1. Off-by-one bugs are common in callbacks.
-2. **`namefromdatname` column missing** — replicator silently uses `digits` (numeric suffix) names. Buttons end up named `1`, `2`, `3` instead of meaningful names. Set `par.namefromdatname` explicitly.
-3. **Template lives in network** — the template OP is itself a real network node. Don't connect things downstream of it directly; connect to the clones (or use a `nullCOMP` between).
-4. **Recreate-on-change wipes state** — toggles, slider positions, and uncached data inside clones are lost on each regeneration. Use `recreatemissing` to preserve.
-5. **`onReplicate` doesn't fire on edit** — only fires when the clone set changes. Editing a value WITHIN an existing row doesn't re-trigger. Use `parameterExecuteDAT` or expressions for per-cell live updates.
-6. **Custom params on clones** — pages added in the template propagate. Pages added in `onReplicate` don't survive the next regeneration. Always add custom pages on the template, not the clone.
-7. **Cooking storms** — adding many rows fast triggers many clone events. Bundle adds via Python and call `data.cook(force=True)` once at the end.
-8. **`me.digits` outside replicator children** — `me.digits` only resolves inside an op that's a descendant of the replicator. Don't reference it in unrelated networks.
-9. **Cross-clone references** — referencing a sibling clone via relative path works from inside a clone (`op('../OtherClone/x')`), but breaks if names change. Prefer absolute paths via the data table.
-
---
-
-## Quick Recipes
-
-| Goal | Setup |
-|---|---|
-| 8-button scene picker | `tableDAT` (8 rows) + `buttonCOMP` template + `replicatorCOMP` |
-| Per-band EQ strip panel | `tableDAT` (band names) + container template (label + slider) + replicator |
-| Data-driven visual scenes | `tableDAT` (scene config) + `baseCOMP` template (visual chain) + replicator |
-| Live-updating clone set | Same as above + `par.recreatemissing = True` |
-| Per-row colored UI | Data table with color cols, `onReplicate` callback sets per-clone colors |
-| List from API response | `webDAT` → `datExecuteDAT` parses JSON → writes to data table → replicator updates |
@@ -516,88 +516,26 @@ class TestGetTextAuxiliaryClient:
        assert isinstance(client, CodexAuxiliaryClient)
        assert model == "gpt-5.2-codex"

-    def test_returns_none_when_nothing_available(self, monkeypatch):
-        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
-        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
-             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
-            client, model = get_text_auxiliary_client()
-        assert client is None
-        assert model is None

-    def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
-        with patch("agent.auxiliary_client._resolve_custom_runtime",
-                   return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
-             patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client()
-
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
-        assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
-
-
-class TestVisionClientFallback:
-    """Vision client auto mode resolves known-good multimodal backends."""
-
-    def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
-        """Active provider appears in available backends when credentials exist."""
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
+class TestNousAuxiliaryRefresh:
+    def test_try_nous_prefers_runtime_credentials(self):
+        fresh_base = "https://inference-api.nousresearch.com/v1"
        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
-            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
-        ):
-            backends = get_available_vision_backends()
-
-        assert "anthropic" in backends
-
-    def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
-        with (
-            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
-        ):
-            client, model = resolve_provider_client("anthropic")
-
-        assert client is not None
-        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
-        assert model == "claude-haiku-4-5-20251001"
-
-
-class TestAuxiliaryPoolAwareness:
-    def test_try_nous_uses_pool_entry(self):
-        class _Entry:
-            access_token = "pooled-access-token"
-            agent_key = "pooled-agent-key"
-            inference_base_url = "https://inference.pool.example/v1"
-
-        class _Pool:
-            def has_credentials(self):
-                return True
-
-            def select(self):
-                return _Entry()
-
-        with (
-            patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            from agent.auxiliary_client import _try_nous

+            mock_openai.return_value = MagicMock()
            client, model = _try_nous()

        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
        assert model == "google/gemini-3-flash-preview"
-        assert mock_openai.call_args.kwargs["api_key"] == "pooled-agent-key"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://inference.pool.example/v1"
+        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
+        assert mock_openai.call_args.kwargs["base_url"] == fresh_base

    def test_try_nous_uses_portal_recommendation_for_text(self):
        """When the Portal recommends a compaction model, _try_nous honors it."""
@@ -705,40 +643,6 @@ class TestAuxiliaryPoolAwareness:
        assert stale_client.chat.completions.create.await_count == 1
        assert fresh_async_client.chat.completions.create.await_count == 1

-    def test_cached_gmi_client_keeps_explicit_slash_model_override(self):
-        import agent.auxiliary_client as aux
-
-        fake_client = MagicMock()
-
-        with patch(
-            "agent.auxiliary_client.resolve_provider_client",
-            return_value=(fake_client, "google/gemini-3.1-flash-lite-preview"),
-        ) as mock_resolve:
-            aux.shutdown_cached_clients()
-            try:
-                client, model = aux._get_cached_client(
-                    "gmi",
-                    "google/gemini-3.1-flash-lite-preview",
-                    base_url="https://api.gmi-serving.com/v1",
-                    api_key="gmi-key",
-                )
-                assert client is fake_client
-                assert model == "google/gemini-3.1-flash-lite-preview"
-
-                client, model = aux._get_cached_client(
-                    "gmi",
-                    "openai/gpt-5.4-mini",
-                    base_url="https://api.gmi-serving.com/v1",
-                    api_key="gmi-key",
-                )
-            finally:
-                aux.shutdown_cached_clients()
-
-        assert client is fake_client
-        assert model == "openai/gpt-5.4-mini"
-        assert mock_resolve.call_count == 1
-
-
 # ── Payment / credit exhaustion fallback ─────────────────────────────────


@@ -242,298 +242,6 @@ class TestSummaryFailureCooldown:
        assert mock_call.call_count == 1


-class TestSummaryFallbackToMainModel:
-    """When ``summary_model`` differs from the main model and the summary LLM
-    call fails, the compressor should retry once on the main model before
-    giving up — losing N turns of context is almost always worse than one
-    extra summary attempt.  Covers both the fast-path (explicit
-    model-not-found errors) and the unknown-error best-effort retry."""
-
-    def _msgs(self):
-        return [
-            {"role": "user", "content": "do something"},
-            {"role": "assistant", "content": "ok"},
-        ]
-
-    def test_model_not_found_404_falls_back_to_main_and_succeeds(self):
-        """Classic misconfiguration: ``auxiliary.compression.model`` points at
-        a model the main provider doesn't serve → 404 → retry on main."""
-        mock_ok = MagicMock()
-        mock_ok.choices = [MagicMock()]
-        mock_ok.choices[0].message.content = "summary via main model"
-
-        err_404 = Exception("404 model_not_found: no such model")
-        err_404.status_code = 404
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="broken-aux-model",
-                quiet_mode=True,
-            )
-
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=[err_404, mock_ok],
-        ) as mock_call:
-            result = c._generate_summary(self._msgs())
-
-        assert mock_call.call_count == 2
-        # First call used the misconfigured aux model
-        assert mock_call.call_args_list[0].kwargs.get("model") == "broken-aux-model"
-        # Second call used the main model (no model kwarg → call_llm uses main)
-        assert "model" not in mock_call.call_args_list[1].kwargs
-        assert result is not None
-        assert "summary via main model" in result
-        # Aux-model failure is recorded even though retry succeeded — this is
-        # how callers (gateway /compress, CLI warning) know to tell the user
-        # their auxiliary.compression.model setting is broken.
-        assert c._last_aux_model_failure_model == "broken-aux-model"
-        assert c._last_aux_model_failure_error is not None
-        assert "404" in c._last_aux_model_failure_error
-
-    def test_unknown_error_falls_back_to_main_and_succeeds(self):
-        """Errors that don't match the 404/503/model_not_found fast-path
-        (400s, provider-specific 'no route', aggregator rejections) should
-        ALSO trigger a best-effort retry on main before entering cooldown."""
-        mock_ok = MagicMock()
-        mock_ok.choices = [MagicMock()]
-        mock_ok.choices[0].message.content = "summary via main model"
-
-        # A 400 from OpenRouter / Nous portal with an opaque message — does
-        # NOT match _is_model_not_found, but still an unrecoverable misconfig.
-        err_400 = Exception("400 Bad Request: provider rejected model")
-        err_400.status_code = 400
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="broken-aux-model",
-                quiet_mode=True,
-            )
-
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=[err_400, mock_ok],
-        ) as mock_call:
-            result = c._generate_summary(self._msgs())
-
-        assert mock_call.call_count == 2
-        assert mock_call.call_args_list[0].kwargs.get("model") == "broken-aux-model"
-        assert "model" not in mock_call.call_args_list[1].kwargs
-        assert result is not None
-        assert "summary via main model" in result
-        # Aux-model failure recorded despite successful recovery
-        assert c._last_aux_model_failure_model == "broken-aux-model"
-        assert c._last_aux_model_failure_error is not None
-        assert "400" in c._last_aux_model_failure_error
-
-    def test_no_fallback_when_summary_model_equals_main_model(self):
-        """If the aux model IS the main model, there's nowhere to fall back
-        to — go straight to cooldown, don't loop retrying the same call."""
-        err = Exception("500 internal error")
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="main-model",  # same as main
-                quiet_mode=True,
-            )
-
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=err,
-        ) as mock_call:
-            result = c._generate_summary(self._msgs())
-
-        # Only one attempt — retry gate blocks fallback when models match
-        assert mock_call.call_count == 1
-        assert result is None
-        # Not flagged as fallen back — the retry condition was never met
-        assert getattr(c, "_summary_model_fallen_back", False) is False
-
-    def test_fallback_only_happens_once_per_compressor(self):
-        """If the retry-on-main ALSO fails, don't loop forever — enter
-        cooldown like the normal failure path."""
-        err1 = Exception("400 aux model rejected")
-        err2 = Exception("500 main model also exploded")
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="broken-aux-model",
-                quiet_mode=True,
-            )
-
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=[err1, err2],
-        ) as mock_call:
-            result = c._generate_summary(self._msgs())
-
-        # Exactly 2 calls: initial + one retry on main.  No further retries.
-        assert mock_call.call_count == 2
-        assert result is None
-        assert c._summary_model_fallen_back is True
-
-
-class TestAuxModelFallbackSurfacedToCallers:
-    """When summary_model fails but retry-on-main succeeds, compress() must
-    expose the aux-model failure via _last_aux_model_failure_{model,error}
-    so gateway /compress and CLI callers can warn the user about their
-    broken auxiliary.compression.model config — silent recovery would hide
-    a misconfiguration only the user can fix."""
-
-    def _make_msgs(self):
-        return [
-            {"role": "system", "content": "sys"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-    def test_compress_exposes_aux_failure_fields_after_successful_fallback(self):
-        mock_ok = MagicMock()
-        mock_ok.choices = [MagicMock()]
-        mock_ok.choices[0].message.content = "summary via main"
-        err_400 = Exception("400 provider rejected configured model")
-        err_400.status_code = 400
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="broken-aux-model",
-                quiet_mode=True,
-                protect_first_n=2,
-                protect_last_n=2,
-            )
-
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=[err_400, mock_ok],
-        ):
-            result = c.compress(self._make_msgs())
-
-        # Recovery succeeded → no fallback placeholder
-        assert c._last_summary_fallback_used is False
-        # But aux-model failure IS recorded for the gateway/CLI warning
-        assert c._last_aux_model_failure_model == "broken-aux-model"
-        assert c._last_aux_model_failure_error is not None
-        assert "400" in c._last_aux_model_failure_error
-        # Result is well-formed with a real summary, not a placeholder
-        assert any(
-            isinstance(m.get("content"), str) and "summary via main" in m["content"]
-            for m in result
-        )
-
-    def test_compress_clears_aux_failure_fields_at_start_of_next_call(self):
-        """A subsequent successful compression must clear the aux-failure
-        fields so the warning doesn't persist forever."""
-        mock_ok = MagicMock()
-        mock_ok.choices = [MagicMock()]
-        mock_ok.choices[0].message.content = "summary via main"
-        err_400 = Exception("400 aux model busted")
-        err_400.status_code = 400
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(
-                model="main-model",
-                summary_model_override="broken-aux-model",
-                quiet_mode=True,
-                protect_first_n=2,
-                protect_last_n=2,
-            )
-
-        # Call 1: aux fails, retry-on-main succeeds
-        with patch(
-            "agent.context_compressor.call_llm",
-            side_effect=[err_400, mock_ok],
-        ):
-            c.compress(self._make_msgs())
-        assert c._last_aux_model_failure_model == "broken-aux-model"
-
-        # Call 2: clean run on main (summary_model was cleared to "" after
-        # first fallback).  Aux-failure fields MUST reset at compress() start
-        # so the old warning state doesn't leak into this call.
-        with patch(
-            "agent.context_compressor.call_llm",
-            return_value=mock_ok,
-        ):
-            c.compress(self._make_msgs())
-        assert c._last_aux_model_failure_model is None
-        assert c._last_aux_model_failure_error is None
-
-
-class TestSummaryFailureTrackingForGatewayWarning:
-    """When summary generation fails, the compressor must record dropped count
-    + fallback flag so gateway hygiene & /compress can surface a visible
-    warning instead of silently dropping context."""
-
-    def test_compress_records_fallback_and_dropped_count_on_summary_failure(self):
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
-
-        msgs = [
-            {"role": "system", "content": "sys"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-        # Simulate summary LLM call failing — covers the 404 / model-not-found
-        # case from issue (auxiliary compression model misconfigured).
-        with patch("agent.context_compressor.call_llm", side_effect=Exception("404 model not found")):
-            result = c.compress(msgs)
-
-        assert c._last_summary_fallback_used is True
-        assert c._last_summary_dropped_count > 0
-        assert c._last_summary_error is not None
-        # Result must still be well-formed (fallback summary present).
-        assert any(
-            isinstance(m.get("content"), str) and "Summary generation was unavailable" in m["content"]
-            for m in result
-        )
-
-    def test_compress_clears_fallback_flag_on_subsequent_success(self):
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = "summary text"
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
-
-        msgs = [
-            {"role": "system", "content": "sys"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-        # First call fails, second succeeds — flag must reset on second compress.
-        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")):
-            c.compress(msgs)
-        assert c._last_summary_fallback_used is True
-
-        # Reset cooldown to allow retry on second compress
-        c._summary_failure_cooldown_until = 0.0
-        with patch("agent.context_compressor.call_llm", return_value=mock_response):
-            c.compress(msgs)
-        assert c._last_summary_fallback_used is False
-        assert c._last_summary_dropped_count == 0
-
-
 class TestSummaryPrefixNormalization:
    def test_legacy_prefix_is_replaced(self):
        summary = ContextCompressor._with_summary_prefix("[CONTEXT SUMMARY]: did work")
@@ -10,7 +10,7 @@ import unittest
 from pathlib import Path
 from unittest.mock import patch

-from acp_adapter.copilot_client import CopilotACPClient
+from agent.copilot_acp_client import CopilotACPClient


 class _FakeProcess:
@@ -100,7 +100,7 @@ class CopilotACPClientSafetyTests(unittest.TestCase):
            target = home / ".ssh" / "id_rsa"
            target.parent.mkdir(parents=True, exist_ok=True)

-            with patch("acp_adapter.copilot_client.is_write_denied", return_value=True, create=True):
+            with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
                response = self._dispatch(
                    {
                        "jsonrpc": "2.0",
@@ -71,17 +71,17 @@ class TestMinimaxThinkingSupport:


 class TestMinimaxAuxModel:
-    """Verify auxiliary model is standard (not highspeed) — now reads from profiles."""
+    """Verify auxiliary model is standard (not highspeed)."""

    def test_minimax_aux_is_standard(self):
-        from agent.auxiliary_client import _get_aux_model_for_provider
-        assert _get_aux_model_for_provider("minimax") == "MiniMax-M2.7"
-        assert _get_aux_model_for_provider("minimax-cn") == "MiniMax-M2.7"
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7"
+        assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7"

    def test_minimax_aux_not_highspeed(self):
-        from agent.auxiliary_client import _get_aux_model_for_provider
-        assert "highspeed" not in _get_aux_model_for_provider("minimax")
-        assert "highspeed" not in _get_aux_model_for_provider("minimax-cn")
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"]
+        assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]


 class TestMinimaxBetaHeaders:
@@ -1,211 +0,0 @@
-"""Unit tests for StreamingContextScrubber (agent/memory_manager.py).
-
-Regression coverage for #5719 — memory-context spans split across stream
-deltas must not leak payload to the UI.  The one-shot sanitize_context()
-regex can't survive chunk boundaries, so _fire_stream_delta routes deltas
-through a stateful scrubber.
-"""
-
-from agent.memory_manager import StreamingContextScrubber, sanitize_context
-
-
-class TestStreamingContextScrubberBasics:
-    def test_empty_input_returns_empty(self):
-        s = StreamingContextScrubber()
-        assert s.feed("") == ""
-        assert s.flush() == ""
-
-    def test_plain_text_passes_through(self):
-        s = StreamingContextScrubber()
-        assert s.feed("hello world") == "hello world"
-        assert s.flush() == ""
-
-    def test_complete_block_in_single_delta(self):
-        """Regression: the one-shot test case from #13672 must still work."""
-        s = StreamingContextScrubber()
-        leaked = (
-            "<memory-context>\n"
-            "[System note: The following is recalled memory context, NOT new "
-            "user input. Treat as informational background data.]\n\n"
-            "## Honcho Context\nstale memory\n"
-            "</memory-context>\n\nVisible answer"
-        )
-        out = s.feed(leaked) + s.flush()
-        assert out == "\n\nVisible answer"
-
-    def test_open_and_close_in_separate_deltas_strips_payload(self):
-        """The real streaming case: tag pair split across deltas."""
-        s = StreamingContextScrubber()
-        deltas = [
-            "Hello ",
-            "<memory-context>\npayload ",
-            "more payload\n",
-            "</memory-context> world",
-        ]
-        out = "".join(s.feed(d) for d in deltas) + s.flush()
-        assert out == "Hello  world"
-        assert "payload" not in out
-
-    def test_realistic_fragmented_chunks_strip_memory_payload(self):
-        """Exact leak scenario from the reviewer's comment — 4 realistic chunks.
-
-        This is the case the original #13672 fix silently leaks on: the open
-        tag, system note, payload, and close tag each arrive in their own
-        delta because providers emit 1-80 char chunks.
-        """
-        s = StreamingContextScrubber()
-        deltas = [
-            "<memory-context>\n[System note: The following",
-            " is recalled memory context, NOT new user input. "
-            "Treat as informational background data.]\n\n",
-            "## Honcho Context\nstale memory\n",
-            "</memory-context>\n\nVisible answer",
-        ]
-        out = "".join(s.feed(d) for d in deltas) + s.flush()
-        assert out == "\n\nVisible answer"
-        # The system-note line and payload must never reach the UI.
-        assert "System note" not in out
-        assert "Honcho Context" not in out
-        assert "stale memory" not in out
-
-    def test_open_tag_split_across_two_deltas(self):
-        """The open tag itself arriving in two fragments."""
-        s = StreamingContextScrubber()
-        out = (
-            s.feed("pre <memory")
-            + s.feed("-context>leak</memory-context> post")
-            + s.flush()
-        )
-        assert out == "pre  post"
-        assert "leak" not in out
-
-    def test_close_tag_split_across_two_deltas(self):
-        """The close tag arriving in two fragments."""
-        s = StreamingContextScrubber()
-        out = (
-            s.feed("pre <memory-context>leak</memory")
-            + s.feed("-context> post")
-            + s.flush()
-        )
-        assert out == "pre  post"
-        assert "leak" not in out
-
-
-class TestStreamingContextScrubberPartialTagFalsePositives:
-    def test_partial_open_tag_tail_emitted_on_flush(self):
-        """Bare '<mem' at end of stream is not really a memory-context tag."""
-        s = StreamingContextScrubber()
-        out = s.feed("hello <mem") + s.feed("ory other") + s.flush()
-        assert out == "hello <memory other"
-
-    def test_partial_tag_released_when_disambiguated(self):
-        """A held-back partial tag that turns out to be prose gets released."""
-        s = StreamingContextScrubber()
-        # '< ' should not look like the start of any tag.
-        out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
-        assert out == "price < 10 dollars"
-
-
-class TestStreamingContextScrubberUnterminatedSpan:
-    def test_unterminated_span_drops_payload(self):
-        """Provider drops close tag — better to lose output than to leak."""
-        s = StreamingContextScrubber()
-        out = s.feed("pre <memory-context>secret never closed") + s.flush()
-        assert out == "pre "
-        assert "secret" not in out
-
-    def test_reset_clears_hung_span(self):
-        """Cross-turn scrubber reset drops a hung span so next turn is clean."""
-        s = StreamingContextScrubber()
-        s.feed("pre <memory-context>half")
-        s.reset()
-        out = s.feed("clean text") + s.flush()
-        assert out == "clean text"
-
-
-class TestStreamingContextScrubberCaseInsensitivity:
-    def test_uppercase_tags_still_scrubbed(self):
-        s = StreamingContextScrubber()
-        out = (
-            s.feed("<MEMORY-CONTEXT>secret")
-            + s.feed("</Memory-Context>visible")
-            + s.flush()
-        )
-        assert out == "visible"
-        assert "secret" not in out
-
-
-class TestSanitizeContextUnchanged:
-    """Smoke test that the one-shot sanitize_context still works for whole strings."""
-
-    def test_whole_block_still_sanitized(self):
-        leaked = (
-            "<memory-context>\n"
-            "[System note: The following is recalled memory context, NOT new "
-            "user input. Treat as informational background data.]\n"
-            "payload\n"
-            "</memory-context>\nVisible"
-        )
-        out = sanitize_context(leaked).strip()
-        assert out == "Visible"
-
-
-class TestStreamingContextScrubberCrossTurn:
-    """A scrubber instance is reused across turns (per agent).  reset() must
-    clear any held state so a partial-tag tail from turn N doesn't bleed
-    into turn N+1's first delta."""
-
-    def test_reset_clears_held_partial_tag(self):
-        s = StreamingContextScrubber()
-        # Feed a partial open-tag prefix that gets held back as buffer.
-        out_turn_1 = s.feed("answer<memo")
-        assert out_turn_1 == "answer"
-
-        # Reset for next turn — buffer must clear.
-        s.reset()
-
-        # New turn: plain text starting with a "<m" must NOT be treated as
-        # the continuation of the held "<memo".
-        out_turn_2 = s.feed("<marker>fresh content")
-        assert out_turn_2 == "<marker>fresh content"
-
-    def test_reset_clears_in_span_state(self):
-        s = StreamingContextScrubber()
-        s.feed("text<memory-context>secret-tail")
-        # Mid-span state held — without reset, subsequent text would be
-        # discarded until we see </memory-context>.
-        s.reset()
-        out = s.feed("post-reset visible text")
-        assert out == "post-reset visible text"
-
-
-class TestBuildMemoryContextBlockWarnsOnViolation:
-    """Providers must return raw context — not pre-wrapped.  When they do,
-    we strip and warn so the buggy provider surfaces."""
-
-    def test_provider_emitting_wrapper_warns(self, caplog):
-        import logging
-        from agent.memory_manager import build_memory_context_block
-
-        prewrapped = (
-            "<memory-context>\n"
-            "[System note: ...]\n\n"
-            "real fact\n"
-            "</memory-context>"
-        )
-        with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
-            out = build_memory_context_block(prewrapped)
-
-        assert any("pre-wrapped" in rec.message for rec in caplog.records)
-        assert out.count("<memory-context>") == 1
-        assert out.count("</memory-context>") == 1
-
-    def test_clean_provider_output_does_not_warn(self, caplog):
-        import logging
-        from agent.memory_manager import build_memory_context_block
-
-        with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
-            out = build_memory_context_block("plain fact about user")
-
-        assert not any("pre-wrapped" in rec.message for rec in caplog.records)
-        assert "plain fact about user" in out
@@ -73,21 +73,17 @@ class TestChatCompletionsBuildKwargs:
        assert kw["tools"] == tools

    def test_openrouter_provider_prefs(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("openrouter")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-4o", messages=msgs,
-            provider_profile=profile,
+            is_openrouter=True,
            provider_preferences={"only": ["openai"]},
        )
        assert kw["extra_body"]["provider"] == {"only": ["openai"]}

    def test_nous_tags(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("nous")
        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]

    def test_reasoning_default(self, transport):
@@ -99,36 +95,29 @@ class TestChatCompletionsBuildKwargs:
        assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}

    def test_nous_omits_disabled_reasoning(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("nous")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gpt-4o", messages=msgs,
-            provider_profile=profile,
            supports_reasoning=True,
+            is_nous=True,
            reasoning_config={"enabled": False},
        )
        # Nous rejects enabled=false; reasoning omitted entirely
        assert "reasoning" not in kw.get("extra_body", {})

    def test_ollama_num_ctx(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("custom")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="llama3", messages=msgs,
-            provider_profile=profile,
            ollama_num_ctx=32768,
        )
        assert kw["extra_body"]["options"]["num_ctx"] == 32768

    def test_custom_think_false(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("custom")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="qwen3", messages=msgs,
-            provider_profile=profile,
+            is_custom_provider=True,
            reasoning_config={"effort": "none"},
        )
        assert kw["extra_body"]["think"] is False
@@ -153,29 +142,23 @@ class TestChatCompletionsBuildKwargs:
        assert kw["max_tokens"] == 2048

    def test_nvidia_default_max_tokens(self, transport):
-        """NVIDIA max_tokens=16384 is now set via ProviderProfile, not legacy flag."""
-        from providers import get_provider_profile
-
-        profile = get_provider_profile("nvidia")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
-            model="nvidia/llama-3.1-405b-instruct",
-            messages=msgs,
+            model="glm-4.7", messages=msgs,
+            is_nvidia_nim=True,
            max_tokens_param_fn=lambda n: {"max_tokens": n},
-            provider_profile=profile,
        )
+        # NVIDIA default: 16384
        assert kw["max_tokens"] == 16384

    def test_qwen_default_max_tokens(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("qwen-oauth")
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="qwen3-coder-plus", messages=msgs,
-            provider_profile=profile,
+            is_qwen_portal=True,
            max_tokens_param_fn=lambda n: {"max_tokens": n},
        )
-        # Qwen default: 65536 from profile.default_max_tokens
+        # Qwen default: 65536
        assert kw["max_tokens"] == 65536

    def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
@@ -198,23 +181,14 @@ class TestChatCompletionsBuildKwargs:
        assert kw["service_tier"] == "priority"

    def test_fixed_temperature(self, transport):
-        """Fixed temperature is now set via ProviderProfile.fixed_temperature."""
-        from providers.base import ProviderProfile
        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gpt-4o", messages=msgs,
-            provider_profile=ProviderProfile(name="_t", fixed_temperature=0.6),
-        )
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
        assert kw["temperature"] == 0.6

    def test_omit_temperature(self, transport):
-        """Omit temperature is set via ProviderProfile with OMIT_TEMPERATURE sentinel."""
-        from providers.base import ProviderProfile, OMIT_TEMPERATURE
        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gpt-4o", messages=msgs,
-            provider_profile=ProviderProfile(name="_t", fixed_temperature=OMIT_TEMPERATURE),
-        )
+        kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
+        # omit wins
        assert "temperature" not in kw


@@ -222,22 +196,18 @@ class TestChatCompletionsKimi:
    """Regression tests for the Kimi/Moonshot quirks migrated into the transport."""

    def test_kimi_max_tokens_default(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("kimi-coding")
        kw = transport.build_kwargs(
            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
-            provider_profile=profile,
+            is_kimi=True,
            max_tokens_param_fn=lambda n: {"max_tokens": n},
        )
-        # Kimi CLI default: 32000 from KimiProfile.default_max_tokens
+        # Kimi CLI default: 32000
        assert kw["max_tokens"] == 32000

    def test_kimi_reasoning_effort_top_level(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("kimi-coding")
        kw = transport.build_kwargs(
            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
-            provider_profile=profile,
+            is_kimi=True,
            reasoning_config={"effort": "high"},
            max_tokens_param_fn=lambda n: {"max_tokens": n},
        )
@@ -255,21 +225,17 @@ class TestChatCompletionsKimi:
        assert "reasoning_effort" not in kw

    def test_kimi_thinking_enabled_extra_body(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("kimi-coding")
        kw = transport.build_kwargs(
            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
-            provider_profile=profile,
+            is_kimi=True,
            max_tokens_param_fn=lambda n: {"max_tokens": n},
        )
        assert kw["extra_body"]["thinking"] == {"type": "enabled"}

    def test_kimi_thinking_disabled_extra_body(self, transport):
-        from providers import get_provider_profile
-        profile = get_provider_profile("kimi-coding")
        kw = transport.build_kwargs(
            model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
-            provider_profile=profile,
+            is_kimi=True,
            reasoning_config={"enabled": False},
            max_tokens_param_fn=lambda n: {"max_tokens": n},
        )
@@ -288,10 +288,6 @@ def _hermetic_environment(tmp_path, monkeypatch):
        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
    except Exception:
        pass
-    # Explicitly clear provider-specific base URL overrides that don't match
-    # the generic credential-shaped env-var filter above.
-    monkeypatch.delenv("GMI_API_KEY", raising=False)
-    monkeypatch.delenv("GMI_BASE_URL", raising=False)


 # Backward-compat alias — old tests reference this fixture name. Keep it
@@ -123,123 +123,3 @@ async def test_compress_command_explains_when_token_estimate_rises():
    assert "denser summaries" in result
    agent_instance.shutdown_memory_provider.assert_called_once()
    agent_instance.close.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_compress_command_appends_warning_when_summary_generation_fails():
-    """When the auxiliary summariser fails and the compressor inserts a static
-    fallback placeholder, /compress must append a visible ⚠️ warning to its
-    reply. Otherwise the failure is silently logged and the user has no idea
-    earlier context is unrecoverable."""
-    history = _make_history()
-    # Compressed shape is irrelevant for this test — we only care that the
-    # warning surfaces. Drop one message so the headline is non-noop.
-    compressed = [
-        history[0],
-        {"role": "assistant", "content": "[fallback placeholder]"},
-        history[-1],
-    ]
-    runner = _make_runner(history)
-    agent_instance = MagicMock()
-    agent_instance.shutdown_memory_provider = MagicMock()
-    agent_instance.close = MagicMock()
-    agent_instance.context_compressor.has_content_to_compress.return_value = True
-    # Simulate summary-generation failure: fallback flag set, dropped count
-    # populated, error string captured.
-    agent_instance.context_compressor._last_summary_fallback_used = True
-    agent_instance.context_compressor._last_summary_dropped_count = 7
-    agent_instance.context_compressor._last_summary_error = (
-        "404 model not found: gemini-3-flash-preview"
-    )
-    agent_instance.session_id = "sess-1"
-    agent_instance._compress_context.return_value = (compressed, "")
-
-    def _estimate(messages):
-        if messages == history:
-            return 100
-        if messages == compressed:
-            return 60
-        raise AssertionError(f"unexpected transcript: {messages!r}")
-
-    with (
-        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
-        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-        patch("run_agent.AIAgent", return_value=agent_instance),
-        patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
-    ):
-        result = await runner._handle_compress_command(_make_event())
-
-    # The compress reply itself still goes through (the transcript was rewritten).
-    assert "Compressed:" in result
-    # ...but a clearly-marked warning must be appended.
-    assert "⚠️" in result
-    assert "Summary generation failed" in result
-    # Underlying error must surface so users can fix their config.
-    assert "404 model not found" in result
-    # Dropped count must be visible — silently losing N messages is the bug.
-    assert "7" in result
-    assert "historical message(s) were removed" in result
-    agent_instance.shutdown_memory_provider.assert_called_once()
-    agent_instance.close.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_compress_command_surfaces_aux_model_failure_even_when_recovered():
-    """When the user's configured ``auxiliary.compression.model`` errors out
-    but compression recovers by retrying on the main model, /compress must
-    STILL inform the user.  Silent recovery hides broken config the user
-    needs to fix."""
-    history = _make_history()
-    # Compressed transcript — normal successful compression, no placeholder.
-    compressed = [
-        history[0],
-        {"role": "assistant", "content": "summary via main model"},
-        history[-1],
-    ]
-    runner = _make_runner(history)
-    agent_instance = MagicMock()
-    agent_instance.shutdown_memory_provider = MagicMock()
-    agent_instance.close = MagicMock()
-    agent_instance.context_compressor.has_content_to_compress.return_value = True
-    # Fallback placeholder was NOT used — recovery succeeded.
-    agent_instance.context_compressor._last_summary_fallback_used = False
-    agent_instance.context_compressor._last_summary_dropped_count = 0
-    agent_instance.context_compressor._last_summary_error = None
-    # But the configured aux model DID fail before the retry succeeded.
-    agent_instance.context_compressor._last_aux_model_failure_model = (
-        "gemini-3-flash-preview"
-    )
-    agent_instance.context_compressor._last_aux_model_failure_error = (
-        "404 model not found: gemini-3-flash-preview"
-    )
-    agent_instance.session_id = "sess-1"
-    agent_instance._compress_context.return_value = (compressed, "")
-
-    def _estimate(messages):
-        if messages == history:
-            return 100
-        if messages == compressed:
-            return 60
-        raise AssertionError(f"unexpected transcript: {messages!r}")
-
-    with (
-        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}),
-        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-        patch("run_agent.AIAgent", return_value=agent_instance),
-        patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
-    ):
-        result = await runner._handle_compress_command(_make_event())
-
-    # Compression succeeded
-    assert "Compressed:" in result
-    # No ⚠️ warning (that's reserved for dropped-turns case)
-    assert "⚠️" not in result
-    # But there IS an info note about the broken aux model
-    assert "ℹ️" in result
-    assert "gemini-3-flash-preview" in result
-    assert "404" in result
-    assert "auxiliary.compression.model" in result
-    # The user's context is explicitly called out as intact
-    assert "intact" in result
-    agent_instance.shutdown_memory_provider.assert_called_once()
-    agent_instance.close.assert_called_once()
@@ -1,200 +0,0 @@
-"""Tests for BasePlatformAdapter._keep_typing timeout-per-tick behavior.
-
-When the gateway is waiting on a long upstream provider response (e.g.
-Anthropic/opus-4.7 first-token latency climbing during an upstream blip),
-the model-call socket is blocked on the worker thread but the asyncio loop
-is still running, and ``_keep_typing`` refreshes the platform typing
-indicator every 2 seconds.
-
-The bug: each ``send_typing`` call is an HTTP round-trip to the platform API
-(Telegram/Discord). If the same network instability that's slowing the model
-call also makes ``send_typing`` slow (5-30s response time), the refresh loop
-stalls inside the ``await self.send_typing(...)`` call. Platform-side typing
-expires at ~5s, so the bubble dies and doesn't come back until that stuck
-call returns — exactly when the user most needs the "yes, still working"
-signal.
-
-The fix: bound each ``send_typing`` with ``asyncio.wait_for``. If a
-send_typing takes longer than the per-tick budget (default 1.5s when
-interval=2.0), abandon it and let the next scheduled tick fire a fresh
-call. As long as any one of them succeeds within the ~5s platform window,
-the bubble stays visible across provider stalls.
-"""
-
-import asyncio
-from unittest.mock import MagicMock
-
-import pytest
-
-from gateway.platforms.base import (
-    BasePlatformAdapter,
-    Platform,
-    PlatformConfig,
-    SendResult,
-)
-
-
-class _StubAdapter(BasePlatformAdapter):
-    def __init__(self):
-        super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
-
-    async def connect(self) -> bool:
-        return True
-
-    async def disconnect(self) -> None:
-        self._mark_disconnected()
-
-    async def send(self, chat_id, content, reply_to=None, metadata=None):
-        return SendResult(success=True, message_id="m1")
-
-    async def get_chat_info(self, chat_id):
-        return {"id": chat_id, "type": "dm"}
-
-
-class TestKeepTypingTimeoutPerTick:
-    @pytest.mark.asyncio
-    async def test_slow_send_typing_does_not_block_cadence(self, monkeypatch):
-        """A send_typing that hangs longer than the per-tick budget must be
-        abandoned so the next scheduled tick can fire a fresh call."""
-        adapter = _StubAdapter()
-        call_events = []
-
-        async def slow_send_typing(chat_id, metadata=None):
-            # Simulate a stuck HTTP round-trip. If _keep_typing awaits this
-            # unconditionally, the loop stalls for the full duration.
-            call_events.append("start")
-            try:
-                await asyncio.sleep(10)
-            finally:
-                call_events.append("finish-or-cancel")
-
-        monkeypatch.setattr(adapter, "send_typing", slow_send_typing)
-        # Avoid stop_typing side-effects in the finally block.
-        adapter.stop_typing = MagicMock(return_value=asyncio.sleep(0))
-
-        stop_event = asyncio.Event()
-        # Start the typing loop, let it run ~3s (should fire 2 ticks) then stop.
-        task = asyncio.create_task(
-            adapter._keep_typing(
-                chat_id="123",
-                interval=1.0,
-                stop_event=stop_event,
-            )
-        )
-        await asyncio.sleep(3.0)
-        stop_event.set()
-        try:
-            await asyncio.wait_for(task, timeout=2.0)
-        except asyncio.TimeoutError:
-            task.cancel()
-            pytest.fail(
-                "_keep_typing did not exit within 2s of stop_event.set() — "
-                "it is blocked on a slow send_typing call"
-            )
-
-        # With per-tick timeout, we should see MULTIPLE send_typing starts
-        # despite each being slow (abandoned via TimeoutError).  Without the
-        # fix there would be exactly 1 start (the one still stuck).
-        starts = [e for e in call_events if e == "start"]
-        assert len(starts) >= 2, (
-            f"expected at least 2 send_typing ticks across 3s of slow "
-            f"operation, got {len(starts)} — refresh cadence is stalled "
-            f"on a slow send_typing"
-        )
-
-    @pytest.mark.asyncio
-    async def test_fast_send_typing_still_gets_awaited(self, monkeypatch):
-        """When send_typing is fast (normal case), it must still complete
-        normally — the timeout is only an upper bound, not a cap on
-        successful calls."""
-        adapter = _StubAdapter()
-        completed = []
-
-        async def fast_send_typing(chat_id, metadata=None):
-            await asyncio.sleep(0.01)  # well under the timeout
-            completed.append(chat_id)
-
-        monkeypatch.setattr(adapter, "send_typing", fast_send_typing)
-        adapter.stop_typing = MagicMock(return_value=asyncio.sleep(0))
-
-        stop_event = asyncio.Event()
-        task = asyncio.create_task(
-            adapter._keep_typing(
-                chat_id="456",
-                interval=0.5,
-                stop_event=stop_event,
-            )
-        )
-        await asyncio.sleep(1.2)  # ~3 ticks
-        stop_event.set()
-        await asyncio.wait_for(task, timeout=1.0)
-
-        assert len(completed) >= 2, (
-            f"expected multiple completed send_typing calls, got "
-            f"{len(completed)}"
-        )
-        assert all(c == "456" for c in completed)
-
-    @pytest.mark.asyncio
-    async def test_send_typing_exception_does_not_kill_loop(self, monkeypatch):
-        """A send_typing that raises (e.g. transient HTTP 500) must be
-        caught so the loop continues refreshing on schedule."""
-        adapter = _StubAdapter()
-        tick_count = {"n": 0}
-
-        async def flaky_send_typing(chat_id, metadata=None):
-            tick_count["n"] += 1
-            if tick_count["n"] == 1:
-                raise RuntimeError("transient upstream error")
-            # Subsequent calls succeed.
-
-        monkeypatch.setattr(adapter, "send_typing", flaky_send_typing)
-        adapter.stop_typing = MagicMock(return_value=asyncio.sleep(0))
-
-        stop_event = asyncio.Event()
-        task = asyncio.create_task(
-            adapter._keep_typing(
-                chat_id="789",
-                interval=0.3,
-                stop_event=stop_event,
-            )
-        )
-        await asyncio.sleep(1.0)
-        stop_event.set()
-        await asyncio.wait_for(task, timeout=1.0)
-
-        assert tick_count["n"] >= 2, (
-            f"loop exited after first send_typing exception; expected it to "
-            f"keep ticking (got {tick_count['n']} ticks)"
-        )
-
-    @pytest.mark.asyncio
-    async def test_paused_chat_skips_send_typing(self, monkeypatch):
-        """When a chat is in _typing_paused (e.g. awaiting approval), the
-        loop must not call send_typing at all. Regression guard — existing
-        behavior, preserved through the timeout change."""
-        adapter = _StubAdapter()
-        calls = []
-
-        async def recording_send_typing(chat_id, metadata=None):
-            calls.append(chat_id)
-
-        monkeypatch.setattr(adapter, "send_typing", recording_send_typing)
-        adapter.stop_typing = MagicMock(return_value=asyncio.sleep(0))
-        adapter._typing_paused.add("paused-chat")
-
-        stop_event = asyncio.Event()
-        task = asyncio.create_task(
-            adapter._keep_typing(
-                chat_id="paused-chat",
-                interval=0.3,
-                stop_event=stop_event,
-            )
-        )
-        await asyncio.sleep(1.0)
-        stop_event.set()
-        await asyncio.wait_for(task, timeout=1.0)
-
-        assert calls == [], (
-            f"send_typing was called on a paused chat: {calls}"
-        )
@@ -393,243 +393,3 @@ async def test_session_hygiene_messages_stay_in_originating_topic(monkeypatch, t
    assert FakeCompressAgent.last_instance is not None
    FakeCompressAgent.last_instance.shutdown_memory_provider.assert_called_once()
    FakeCompressAgent.last_instance.close.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_session_hygiene_warns_user_when_summary_generation_fails(monkeypatch, tmp_path):
-    """When auxiliary compression's summary LLM call fails, the compressor
-    inserts a static fallback and the dropped turns are unrecoverable.
-    Gateway must surface a visible ⚠️ warning to the user, including
-    thread_id metadata so it lands in the originating topic/thread."""
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
-
-    class FakeCompressAgentWithSummaryFailure:
-        last_instance = None
-
-        def __init__(self, **kwargs):
-            self.model = kwargs.get("model")
-            self.session_id = kwargs.get("session_id", "fake-session")
-            self._print_fn = None
-            self.shutdown_memory_provider = MagicMock()
-            self.close = MagicMock()
-            # Simulate a compressor that hit summary-generation failure
-            # and inserted the static fallback placeholder.
-            self.context_compressor = SimpleNamespace(
-                _last_summary_fallback_used=True,
-                _last_summary_dropped_count=42,
-                _last_summary_error="404 model not found: gemini-3-flash-preview",
-            )
-            type(self).last_instance = self
-
-        def _compress_context(self, messages, *_args, **_kwargs):
-            self.session_id = f"{self.session_id}_compressed"
-            return ([{"role": "assistant", "content": "compressed"}], None)
-
-    fake_run_agent = types.ModuleType("run_agent")
-    fake_run_agent.AIAgent = FakeCompressAgentWithSummaryFailure
-    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-    gateway_run = importlib.import_module("gateway.run")
-    GatewayRunner = gateway_run.GatewayRunner
-
-    adapter = HygieneCaptureAdapter()
-    runner = object.__new__(GatewayRunner)
-    runner.config = GatewayConfig(
-        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake-token")}
-    )
-    runner.adapters = {Platform.TELEGRAM: adapter}
-    runner._voice_mode = {}
-    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
-    runner.session_store = MagicMock()
-    runner.session_store.get_or_create_session.return_value = SessionEntry(
-        session_key="agent:main:telegram:group:-1001:17585",
-        session_id="sess-1",
-        created_at=datetime.now(),
-        updated_at=datetime.now(),
-        platform=Platform.TELEGRAM,
-        chat_type="group",
-    )
-    runner.session_store.load_transcript.return_value = _make_history(6, content_size=400)
-    runner.session_store.has_any_sessions.return_value = True
-    runner.session_store.rewrite_transcript = MagicMock()
-    runner.session_store.append_to_transcript = MagicMock()
-    runner._running_agents = {}
-    runner._pending_messages = {}
-    runner._pending_approvals = {}
-    runner._session_db = None
-    runner._is_user_authorized = lambda _source: True
-    runner._set_session_env = lambda _context: None
-    runner._run_agent = AsyncMock(
-        return_value={
-            "final_response": "ok",
-            "messages": [],
-            "tools": [],
-            "history_offset": 0,
-            "last_prompt_tokens": 0,
-        }
-    )
-
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
-    monkeypatch.setattr(
-        "agent.model_metadata.get_model_context_length",
-        lambda *_args, **_kwargs: 100,
-    )
-    monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "795544298")
-
-    event = MessageEvent(
-        text="hello",
-        source=SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="-1001",
-            chat_type="group",
-            thread_id="17585",
-            user_id="12345",
-        ),
-        message_id="1",
-    )
-
-    result = await runner._handle_message(event)
-
-    assert result == "ok"
-    # The compressor reported summary-failure → exactly one warning
-    # message must have been delivered to the user.
-    warning_messages = [s for s in adapter.sent if "Context compression summary failed" in s["content"]]
-    assert len(warning_messages) == 1, (
-        f"Expected 1 compression-failure warning, got {len(warning_messages)}: {adapter.sent}"
-    )
-    warn = warning_messages[0]
-    # Warning must include the dropped count and the underlying error.
-    assert "42" in warn["content"]
-    assert "404" in warn["content"]
-    # Warning must land in the originating topic/thread, not the main channel.
-    assert warn["chat_id"] == "-1001"
-    assert warn["metadata"] == {"thread_id": "17585"}
-
-    FakeCompressAgentWithSummaryFailure.last_instance.close.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_session_hygiene_informs_user_when_aux_model_fails_but_recovers(monkeypatch, tmp_path):
-    """When the user's configured ``auxiliary.compression.model`` errors out
-    and we recover via the main model, compression succeeds but the user's
-    config is still broken.  Gateway hygiene must surface an ℹ note so the
-    user knows to fix ``auxiliary.compression.model`` — silent recovery
-    hides a misconfig only they can resolve."""
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
-
-    class FakeCompressAgentWithAuxRecovery:
-        last_instance = None
-
-        def __init__(self, **kwargs):
-            self.model = kwargs.get("model")
-            self.session_id = kwargs.get("session_id", "fake-session")
-            self._print_fn = None
-            self.shutdown_memory_provider = MagicMock()
-            self.close = MagicMock()
-            # Compression succeeded (no placeholder inserted) but the
-            # configured aux model errored and we fell back to main.
-            self.context_compressor = SimpleNamespace(
-                _last_summary_fallback_used=False,
-                _last_summary_dropped_count=0,
-                _last_summary_error=None,
-                _last_aux_model_failure_model="gemini-3-flash-preview",
-                _last_aux_model_failure_error="404 model not found",
-            )
-            type(self).last_instance = self
-
-        def _compress_context(self, messages, *_args, **_kwargs):
-            self.session_id = f"{self.session_id}_compressed"
-            return ([{"role": "assistant", "content": "real summary"}], None)
-
-    fake_run_agent = types.ModuleType("run_agent")
-    fake_run_agent.AIAgent = FakeCompressAgentWithAuxRecovery
-    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-    gateway_run = importlib.import_module("gateway.run")
-    GatewayRunner = gateway_run.GatewayRunner
-
-    adapter = HygieneCaptureAdapter()
-    runner = object.__new__(GatewayRunner)
-    runner.config = GatewayConfig(
-        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake-token")}
-    )
-    runner.adapters = {Platform.TELEGRAM: adapter}
-    runner._voice_mode = {}
-    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
-    runner.session_store = MagicMock()
-    runner.session_store.get_or_create_session.return_value = SessionEntry(
-        session_key="agent:main:telegram:group:-1001:17585",
-        session_id="sess-1",
-        created_at=datetime.now(),
-        updated_at=datetime.now(),
-        platform=Platform.TELEGRAM,
-        chat_type="group",
-    )
-    runner.session_store.load_transcript.return_value = _make_history(6, content_size=400)
-    runner.session_store.has_any_sessions.return_value = True
-    runner.session_store.rewrite_transcript = MagicMock()
-    runner.session_store.append_to_transcript = MagicMock()
-    runner._running_agents = {}
-    runner._pending_messages = {}
-    runner._pending_approvals = {}
-    runner._session_db = None
-    runner._is_user_authorized = lambda _source: True
-    runner._set_session_env = lambda _context: None
-    runner._run_agent = AsyncMock(
-        return_value={
-            "final_response": "ok",
-            "messages": [],
-            "tools": [],
-            "history_offset": 0,
-            "last_prompt_tokens": 0,
-        }
-    )
-
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
-    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
-    monkeypatch.setattr(
-        "agent.model_metadata.get_model_context_length",
-        lambda *_args, **_kwargs: 100,
-    )
-    monkeypatch.setenv("TELEGRAM_HOME_CHANNEL", "795544298")
-
-    event = MessageEvent(
-        text="hello",
-        source=SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="-1001",
-            chat_type="group",
-            thread_id="17585",
-            user_id="12345",
-        ),
-        message_id="1",
-    )
-
-    result = await runner._handle_message(event)
-
-    assert result == "ok"
-    # No ⚠️ hard-failure warning (that's for dropped turns)
-    hard_warnings = [s for s in adapter.sent if "Context compression summary failed" in s["content"]]
-    assert len(hard_warnings) == 0, adapter.sent
-    # But an ℹ note about the configured aux model must be delivered.
-    aux_notes = [
-        s for s in adapter.sent
-        if "Configured compression model" in s["content"]
-    ]
-    assert len(aux_notes) == 1, (
-        f"Expected 1 aux-model fallback notice, got {len(aux_notes)}: {adapter.sent}"
-    )
-    note = aux_notes[0]
-    assert "gemini-3-flash-preview" in note["content"]
-    assert "404" in note["content"]
-    assert "auxiliary.compression.model" in note["content"]
-    # Note must land in the originating topic/thread.
-    assert note["chat_id"] == "-1001"
-    assert note["metadata"] == {"thread_id": "17585"}
-
-    FakeCompressAgentWithAuxRecovery.last_instance.close.assert_called_once()
--- a/Show More
+++ b/Show More