refactor: remove config TypedDicts and fix ImportError propagation in clipboard

Remove 44 TypedDict classes from config.py — they were already stale (11 missing keys) and load_config() still returns Dict[str, Any], so they provided zero type-checking value. Keep the int() coercions and Dict[str, Any] annotations which are real fixes. Fix _wayland_save() swallowing ImportError at DEBUG level by adding an explicit except ImportError: raise before the broad except Exception.
Clean up TODO comment in auxiliary_client.py
2026-04-23 17:41:25 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:10 +05:30
140 changed files with 4978 additions and 2155 deletions
@@ -14,3 +14,6 @@ node_modules
 .env

 *.md
+
+# Runtime data (bind-mounted at /opt/data; must not leak into build context)
+data/
@@ -1,3 +1,4 @@
+.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -243,17 +243,6 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

-### TUI in the Dashboard (`hermes dashboard` → `/chat`)
-
-The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
-
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
-
-**Never add a parallel chat surface in React.** If you catch yourself re-implementing slash popover / model picker / tool cards for the dashboard, stop — the TUI already does those, and anything new you add to Ink will appear in the dashboard automatically.
-
 ---

 ## Adding New Tools
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support |
+| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
+| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

 ### Clone and install

@@ -17,7 +17,6 @@ import os
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

@@ -358,7 +357,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -1599,70 +1598,4 @@ def build_anthropic_kwargs(
    return kwargs


-def normalize_anthropic_response(
-    response,
-    strip_tool_prefix: bool = False,
-) -> Tuple[SimpleNamespace, str]:
-    """Normalize Anthropic response to match the shape expected by AIAgent.

-    Returns (assistant_message, finish_reason) where assistant_message has
-    .content, .tool_calls, and .reasoning attributes.
-
-    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
-    added to tool names for OAuth Claude Code compatibility.
-    """
-    text_parts = []
-    reasoning_parts = []
-    reasoning_details = []
-    tool_calls = []
-
-    for block in response.content:
-        if block.type == "text":
-            text_parts.append(block.text)
-        elif block.type == "thinking":
-            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
-        elif block.type == "tool_use":
-            name = block.name
-            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
-            tool_calls.append(
-                SimpleNamespace(
-                    id=block.id,
-                    type="function",
-                    function=SimpleNamespace(
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    ),
-                )
-            )
-
-    # Map Anthropic stop_reason to OpenAI finish_reason.
-    # Newer stop reasons added in Claude 4.5+ / 4.7:
-    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
-    #   - model_context_window_exceeded: hit context limit (not max_tokens)
-    # Both need distinct handling upstream — a refusal should surface to the
-    # user with a clear message, and a context-window overflow should trigger
-    # compression/truncation rather than be treated as normal end-of-turn.
-    stop_reason_map = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
-
-    return (
-        SimpleNamespace(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
-        ),
-        finish_reason,
-    )
@@ -41,10 +41,13 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

 from openai import OpenAI

+if TYPE_CHECKING:
+    from agent.gemini_native_adapter import GeminiNativeClient
+
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
@@ -573,7 +576,8 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.transports import get_transport

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -610,7 +614,19 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        assistant_message, finish_reason = normalize_anthropic_response(response)
+        _transport = get_transport("anthropic_messages")
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )
+
+        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
+        # .function.arguments) via properties, so no wrapping needed.
+        assistant_message = SimpleNamespace(
+            content=_nr.content,
+            tool_calls=_nr.tool_calls,
+            reasoning=_nr.reasoning,
+        )
+        finish_reason = _nr.finish_reason

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -797,7 +813,11 @@ def _read_codex_access_token() -> Optional[str]:
        return None


-def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
+# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
+#     Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
+#     (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
+#     Gemini/Kimi/Copilot client-building into a helper.
+def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
    """Try each API-key provider in PROVIDER_REGISTRY order.

    Returns (client, model) for the first provider with usable runtime
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
    _save_auth_store,
    _save_provider_state,
    read_credential_pool,
+    read_provider_credentials,
    write_credential_pool,
 )

@@ -321,7 +322,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:

 def list_custom_pool_providers() -> List[str]:
    """Return all 'custom:*' pool keys that have entries in auth.json."""
-    pool_data = read_credential_pool(None)
+    pool_data = read_credential_pool()
    return sorted(
        key for key in pool_data
        if key.startswith(CUSTOM_POOL_PREFIX)
@@ -875,6 +876,20 @@ class CredentialPool:
            self._current_id = None
        return removed

+    def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
+        for idx, entry in enumerate(self._entries):
+            if entry.id == entry_id:
+                removed = self._entries.pop(idx)
+                self._entries = [
+                    replace(e, priority=new_priority)
+                    for new_priority, e in enumerate(self._entries)
+                ]
+                self._persist()
+                if self._current_id == removed.id:
+                    self._current_id = None
+                return removed
+        return None
+
    def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
        raw = str(target or "").strip()
        if not raw:
@@ -1325,7 +1340,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b

 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
-    raw_entries = read_credential_pool(provider)
+    raw_entries = read_provider_credentials(provider)
    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]

    if provider.startswith(CUSTOM_POOL_PREFIX):
@@ -729,6 +729,7 @@ class KawaiiSpinner:
                time.sleep(0.1)
                continue
            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
+            assert self.start_time is not None  # start() sets it before thread starts
            elapsed = time.time() - self.start_time
            if wings:
                left, right = wings[self.frame_idx % len(wings)]
@@ -418,6 +418,9 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
+    from hermes_cli.models import normalize_provider
+    provider = normalize_provider(provider) or provider
+    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -370,6 +370,32 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "mattermost": (
+        "You are in a Mattermost workspace communicating with your user. "
+        "Mattermost renders standard Markdown — headings, bold, italic, code "
+        "blocks, and tables all work. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
+        "attachments, audio and video as file attachments. "
+        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
+    ),
+    "matrix": (
+        "You are in a Matrix room communicating with your user. "
+        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
+        "the adapter converts your Markdown to HTML for rich display. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
+        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
+        "and other files as downloadable attachments."
+    ),
+    "feishu": (
+        "You are in a Feishu (Lark) workspace communicating with your user. "
+        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
+        "links are supported. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
+        "inline, audio files as voice messages, and other files as attachments."
+    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -455,7 +455,8 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
    """
    if ":" not in name:
        return None, name
-    return tuple(name.split(":", 1))  # type: ignore[return-value]
+    ns, bare = name.split(":", 1)
+    return ns, bare


 def is_valid_namespace(candidate: Optional[str]) -> bool:
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=30,
+            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
        )
@@ -78,31 +78,52 @@ class AnthropicTransport(ProviderTransport):
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Anthropic response to NormalizedResponse.

-        Calls the adapter's v1 normalize and maps the (SimpleNamespace, finish_reason)
-        tuple to the shared NormalizedResponse type.
+        Parses content blocks (text, thinking, tool_use), maps stop_reason
+        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
-        from agent.anthropic_adapter import normalize_anthropic_response
-        from agent.transports.types import build_tool_call
+        import json
+        from agent.anthropic_adapter import _to_plain_data
+        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+        _MCP_PREFIX = "mcp_"

-        tool_calls = None
-        if assistant_msg.tool_calls:
-            tool_calls = [
-                build_tool_call(id=tc.id, name=tc.function.name, arguments=tc.function.arguments)
-                for tc in assistant_msg.tool_calls
-            ]
+        text_parts = []
+        reasoning_parts = []
+        reasoning_details = []
+        tool_calls = []
+
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
+                    reasoning_details.append(block_dict)
+            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
+                tool_calls.append(
+                    ToolCall(
+                        id=block.id,
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    )
+                )
+
+        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")

        provider_data = {}
-        if getattr(assistant_msg, "reasoning_details", None):
-            provider_data["reasoning_details"] = assistant_msg.reasoning_details
+        if reasoning_details:
+            provider_data["reasoning_details"] = reasoning_details

        return NormalizedResponse(
-            content=assistant_msg.content,
-            tool_calls=tool_calls,
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
            finish_reason=finish_reason,
-            reasoning=getattr(assistant_msg, "reasoning", None),
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
            usage=None,
            provider_data=provider_data or None,
        )
@@ -37,6 +37,30 @@ class ToolCall:
    arguments: str  # JSON string
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The agent loop reads tc.function.name / tc.function.arguments
+    # throughout run_agent.py (45+ sites).  These properties let
+    # NormalizedResponse pass through without the _nr_to_assistant_message
+    # shim, while keeping ToolCall's canonical fields flat.
+    @property
+    def type(self) -> str:
+        return "function"
+
+    @property
+    def function(self) -> "ToolCall":
+        """Return self so tc.function.name / tc.function.arguments work."""
+        return self
+
+    @property
+    def call_id(self) -> Optional[str]:
+        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
+        return (self.provider_data or {}).get("call_id")
+
+    @property
+    def response_item_id(self) -> Optional[str]:
+        """Codex response_item_id from provider_data."""
+        return (self.provider_data or {}).get("response_item_id")
+

@dataclass
 class Usage:
@@ -70,6 +94,24 @@ class NormalizedResponse:
    usage: Optional[Usage] = None
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The shim _nr_to_assistant_message() mapped these from provider_data.
+    # These properties let NormalizedResponse pass through directly.
+    @property
+    def reasoning_content(self) -> Optional[str]:
+        pd = self.provider_data or {}
+        return pd.get("reasoning_content")
+
+    @property
+    def reasoning_details(self):
+        pd = self.provider_data or {}
+        return pd.get("reasoning_details")
+
+    @property
+    def codex_reasoning_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_reasoning_items")
+

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -30,7 +30,7 @@ from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, TypedDict

 logger = logging.getLogger(__name__)

@@ -84,6 +84,34 @@ _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


+class _ModelPickerState(TypedDict, total=False):
+    stage: str
+    providers: List[Dict[str, Any]]
+    selected: int
+    current_model: str
+    current_provider: str
+    user_provs: Optional[Dict[str, Any]]
+    custom_provs: Optional[Dict[str, Any]]
+    provider_data: Dict[str, Any]
+    model_list: List[str]
+
+
+class _ApprovalState(TypedDict, total=False):
+    command: str
+    description: str
+    choices: List[str]
+    selected: int
+    response_queue: "queue.Queue[str]"
+    show_full: bool
+
+
+class _ClarifyState(TypedDict, total=False):
+    question: str
+    choices: List[str]
+    selected: int
+    response_queue: "queue.Queue[str]"
+
+
 _REASONING_TAGS = (
    "REASONING_SCRATCHPAD",
    "think",
@@ -305,13 +333,23 @@ def load_cli_config() -> Dict[str, Any]:
    
    Environment variables take precedence over config file values.
    Returns default values if no config file exists.
+
+    If HERMES_IGNORE_USER_CONFIG=1 is set (via ``hermes chat --ignore-user-config``),
+    the user config at ``~/.hermes/config.yaml`` is skipped entirely and only the
+    built-in defaults plus the project-level ``cli-config.yaml`` (if any) are used.
+    Credentials in ``.env`` are still loaded — this flag only suppresses
+    behavioral/config settings.
    """
    # Check user config first ({HERMES_HOME}/config.yaml)
    user_config_path = _hermes_home / 'config.yaml'
    project_config_path = Path(__file__).parent / 'cli-config.yaml'

+    # --ignore-user-config: force-skip the user config.yaml (still honor project
+    # config as a fallback so defaults stay sensible).
+    ignore_user_config = os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
+
    # Use user config if it exists, otherwise project config
-    if user_config_path.exists():
+    if user_config_path.exists() and not ignore_user_config:
        config_path = user_config_path
    else:
        config_path = project_config_path
@@ -1718,7 +1756,7 @@ def _parse_skills_argument(skills: str | list[str] | tuple[str, ...] | None) ->
    return parsed


-def save_config_value(key_path: str, value: any) -> bool:
+def save_config_value(key_path: str, value: Any) -> bool:
    """
    Save a value to the active config file at the specified key path.
    
@@ -1802,6 +1840,7 @@ class HermesCLI:
        resume: str = None,
        checkpoints: bool = False,
        pass_session_id: bool = False,
+        ignore_rules: bool = False,
    ):
        """
        Initialize the Hermes CLI.
@@ -1955,6 +1994,11 @@ class HermesCLI:
        self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
        self.pass_session_id = pass_session_id
+        # --ignore-rules: honor either the constructor flag or the env var set
+        # by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
+        # pass skip_context_files=True and skip_memory=True to AIAgent so
+        # AGENTS.md/SOUL.md/.cursorrules and persistent memory are not loaded.
+        self.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
        
        # Ephemeral system prompt: env var takes precedence, then config
        self.system_prompt = (
@@ -2049,16 +2093,16 @@ class HermesCLI:
        self._interrupt_queue = queue.Queue()
        self._should_exit = False
        self._last_ctrl_c_time = 0
-        self._clarify_state = None
+        self._clarify_state: Optional[_ClarifyState] = None
        self._clarify_freetext = False
        self._clarify_deadline = 0
        self._sudo_state = None
        self._sudo_deadline = 0
        self._modal_input_snapshot = None
-        self._approval_state = None
+        self._approval_state: Optional[_ApprovalState] = None
        self._approval_deadline = 0
        self._approval_lock = threading.Lock()
-        self._model_picker_state = None
+        self._model_picker_state: Optional[_ModelPickerState] = None
        self._secret_state = None
        self._secret_deadline = 0
        self._spinner_text: str = ""  # thinking spinner text for TUI
@@ -3312,6 +3356,8 @@ class HermesCLI:
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
+                skip_context_files=self.ignore_rules,
+                skip_memory=self.ignore_rules,
                tool_progress_callback=self._on_tool_progress,
                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
@@ -7138,7 +7184,7 @@ class HermesCLI:
                logging.getLogger(noisy).setLevel(logging.WARNING)
        else:
            logging.getLogger().setLevel(logging.INFO)
-            for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
+            for quiet_logger in ('tools', 'run_agent', 'scripts.trajectory_compressor', 'cron', 'hermes_cli'):
                logging.getLogger(quiet_logger).setLevel(logging.ERROR)

    def _show_insights(self, command: str = "/insights"):
@@ -10816,6 +10862,8 @@ def main(
    w: bool = False,
    checkpoints: bool = False,
    pass_session_id: bool = False,
+    ignore_user_config: bool = False,
+    ignore_rules: bool = False,
 ):
    """
    Hermes Agent CLI - Interactive AI Assistant
@@ -10925,6 +10973,7 @@ def main(
        resume=resume,
        checkpoints=checkpoints,
        pass_session_id=pass_session_id,
+        ignore_rules=ignore_rules,
    )

    if parsed_skills:
@@ -439,8 +439,9 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                delivery_errors.append(msg)
                continue

-            if result and result.get("error"):
-                msg = f"delivery error: {result['error']}"
+            error = result.get("error") if result else None
+            if error:
+                msg = f"delivery error: {error}"
                logger.error("Job '%s': %s", job["id"], msg)
                delivery_errors.append(msg)
                continue
@@ -972,6 +973,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

+        # Guard against non-dict returns from run_conversation under error conditions
+        if not isinstance(result, dict):
+            raise RuntimeError(
+                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
+            )
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
 # Point to the example dataset in this directory
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

-python batch_runner.py \
+python scripts/batch_runner.py \
  --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
  --batch_size=5 \
  --run_name="browser_tasks_example" \
@@ -4,7 +4,7 @@
 # Generates tool-calling trajectories for multi-step web research tasks.
 #
 # Usage:
-#   python batch_runner.py \
+#   python scripts/batch_runner.py \
 #     --config datagen-config-examples/web_research.yaml \
 #     --run_name web_research_v1

@@ -58,6 +58,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

+# Ensure the main config file remains accessible to the hermes runtime user
+# even if it was edited on the host after initial ownership setup.
+if [ -f "$HERMES_HOME/config.yaml" ]; then
+    chown hermes:hermes "$HERMES_HOME/config.yaml"
+    chmod 640 "$HERMES_HOME/config.yaml"
+fi
+
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -18,7 +18,10 @@ import logging
 import os
 import uuid
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from tools.budget_config import BudgetConfig

 from model_tools import handle_function_call
 from tools.terminal_tool import get_active_env
@@ -32,14 +32,7 @@ import sqlite3
 import time
 import uuid
 from typing import Any, Dict, List, Optional
-
-try:
-    from aiohttp import web
-    AIOHTTP_AVAILABLE = True
-except ImportError:
-    AIOHTTP_AVAILABLE = False
-    web = None  # type: ignore[assignment]
-
+from aiohttp import web
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -270,12 +263,6 @@ def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Respons
        status=400,
    )

-
-def check_api_server_requirements() -> bool:
-    """Check if API server dependencies are available."""
-    return AIOHTTP_AVAILABLE
-
-
 class ResponseStore:
    """
    SQLite-backed LRU store for Responses API state.
@@ -391,30 +378,26 @@ _CORS_HEADERS = {
 }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def cors_middleware(request, handler):
-        """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
-        adapter = request.app.get("api_server_adapter")
-        origin = request.headers.get("Origin", "")
-        cors_headers = None
-        if adapter is not None:
-            if not adapter._origin_allowed(origin):
-                return web.Response(status=403)
-            cors_headers = adapter._cors_headers_for_origin(origin)
+@web.middleware
+async def cors_middleware(request, handler):
+    """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
+    adapter = request.app.get("api_server_adapter")
+    origin = request.headers.get("Origin", "")
+    cors_headers = None
+    if adapter is not None:
+        if not adapter._origin_allowed(origin):
+            return web.Response(status=403)
+        cors_headers = adapter._cors_headers_for_origin(origin)

-        if request.method == "OPTIONS":
-            if cors_headers is None:
-                return web.Response(status=403)
-            return web.Response(status=200, headers=cors_headers)
-
-        response = await handler(request)
-        if cors_headers is not None:
-            response.headers.update(cors_headers)
-        return response
-else:
-    cors_middleware = None  # type: ignore[assignment]
+    if request.method == "OPTIONS":
+        if cors_headers is None:
+            return web.Response(status=403)
+        return web.Response(status=200, headers=cors_headers)

+    response = await handler(request)
+    if cors_headers is not None:
+        response.headers.update(cors_headers)
+    return response

 def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
    """OpenAI-style error envelope."""
@@ -428,21 +411,18 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
    }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def body_limit_middleware(request, handler):
-        """Reject overly large request bodies early based on Content-Length."""
-        if request.method in ("POST", "PUT", "PATCH"):
-            cl = request.headers.get("Content-Length")
-            if cl is not None:
-                try:
-                    if int(cl) > MAX_REQUEST_BYTES:
-                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-                except ValueError:
-                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-        return await handler(request)
-else:
-    body_limit_middleware = None  # type: ignore[assignment]
+@web.middleware
+async def body_limit_middleware(request, handler):
+    """Reject overly large request bodies early based on Content-Length."""
+    if request.method in ("POST", "PUT", "PATCH"):
+        cl = request.headers.get("Content-Length")
+        if cl is not None:
+            try:
+                if int(cl) > MAX_REQUEST_BYTES:
+                    return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+            except ValueError:
+                return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+    return await handler(request)

 _SECURITY_HEADERS = {
    "X-Content-Type-Options": "nosniff",
@@ -450,16 +430,13 @@ _SECURITY_HEADERS = {
 }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def security_headers_middleware(request, handler):
-        """Add security headers to all responses (including errors)."""
-        response = await handler(request)
-        for k, v in _SECURITY_HEADERS.items():
-            response.headers.setdefault(k, v)
-        return response
-else:
-    security_headers_middleware = None  # type: ignore[assignment]
+@web.middleware
+async def security_headers_middleware(request, handler):
+    """Add security headers to all responses (including errors)."""
+    response = await handler(request)
+    for k, v in _SECURITY_HEADERS.items():
+        response.headers.setdefault(k, v)
+    return response


 class _IdempotencyCache:
@@ -804,7 +781,7 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
+    async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -1588,7 +1565,7 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_responses(self, request: "web.Request") -> "web.Response":
+    async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -2482,10 +2459,6 @@ class APIServerAdapter(BasePlatformAdapter):

    async def connect(self) -> bool:
        """Start the aiohttp web server."""
-        if not AIOHTTP_AVAILABLE:
-            logger.warning("[%s] aiohttp not installed", self.name)
-            return False
-
        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
@@ -187,16 +187,14 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-
-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}
        except ImportError:
-            logger.warning(
-                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
-                "Run: pip install aiohttp-socks",
-                proxy_url,
-            )
-            return {}
+            raise ImportError(
+                "aiohttp-socks is required for SOCKS proxy support. "
+                "Install with: pip install hermes-agent[messaging]"
+            ) from None
+
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}
    return {"proxy": proxy_url}


@@ -220,16 +218,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-
-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
        except ImportError:
-            logger.warning(
-                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
-                "Run: pip install aiohttp-socks",
-                proxy_url,
-            )
-            return {}, {}
+            raise ImportError(
+                "aiohttp-socks is required for SOCKS proxy support. "
+                "Install with: pip install hermes-agent[messaging]"
+            ) from None
+
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
    return {}, {"proxy": proxy_url}


@@ -428,6 +424,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
+    raise AssertionError("unreachable: retry loop exhausted")


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -542,6 +539,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
+    raise AssertionError("unreachable: retry loop exhausted")


 # ---------------------------------------------------------------------------
@@ -752,7 +750,10 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        return parts[1] if len(parts) > 1 else ""
+        args = parts[1] if len(parts) > 1 else ""
+        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
+        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
+        return args


@dataclass 
@@ -1343,7 +1344,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1828,8 +1829,11 @@ class BasePlatformAdapter(ABC):
        try:
            await self._run_processing_hook("on_processing_start", event)

-            # Call the handler (this can take a while with tool calls)
-            response = await self._message_handler(event)
+            handler = self._message_handler
+            if handler is None:
+                return
+
+            response = await handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
@@ -14,7 +14,7 @@ import logging
 import os
 import re
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote

@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        payload = {
            "addresses": [address],
            "message": message,
-            "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
+            "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
        }
        try:
            res = await self._api_post("/api/v1/chat/new", payload)
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                )
            payload: Dict[str, Any] = {
                "chatGuid": guid,
-                "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
+                "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
                "message": chunk,
            }
            if reply_to and self._private_api_enabled and self._helper_connected:
@@ -527,6 +527,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -744,7 +745,8 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            self._register_slash_commands()
+            if self._slash_commands:
+                self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -1194,9 +1196,16 @@ class DiscordAdapter(BasePlatformAdapter):
            try:
                import base64

-                duration_secs = 5.0
                try:
                    from mutagen.oggopus import OggOpus
+                except ImportError:
+                    raise ImportError(
+                        "mutagen is required for Discord voice messages. "
+                        "Install with: pip install hermes-agent[messaging]"
+                    ) from None
+
+                duration_secs = 5.0
+                try:
                    info = OggOpus(audio_path)
                    duration_secs = info.info.length
                except Exception:
@@ -1889,7 +1898,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # Fetch full member list (requires members intent)
            try:
                members = guild.members
-                if len(members) < guild.member_count:
+                if guild.member_count is not None and len(members) < guild.member_count:
                    members = [m async for m in guild.fetch_members(limit=None)]
            except Exception as e:
                logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
@@ -2502,7 +2511,7 @@ class DiscordAdapter(BasePlatformAdapter):
                if isinstance(skills, str):
                    return [skills]
                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # dedup, preserve order
+                    return list(dict.fromkeys(skills))  # ty: ignore[invalid-return-type]  # dedup, preserve order
        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
@@ -3038,7 +3047,7 @@ class DiscordAdapter(BasePlatformAdapter):

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
-            in_bot_thread = is_thread and thread_id in self._threads
+            in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
                if self._client.user not in message.mentions and not mention_prefix:
@@ -3631,7 +3640,9 @@ if DISCORD_AVAILABLE:
                )
                return

-            provider_slug = interaction.data["values"][0]
+            if interaction.data is None:
+                return
+            provider_slug = interaction.data["values"][0]  # ty: ignore[invalid-key]
            self._selected_provider = provider_slug
            provider = next(
                (p for p in self.providers if p["slug"] == provider_slug), None
@@ -3665,8 +3676,10 @@ if DISCORD_AVAILABLE:
                )
                return

+            if interaction.data is None:
+                return
            self.resolved = True
-            model_id = interaction.data["values"][0]
+            model_id = interaction.data["values"][0]  # ty: ignore[invalid-key]

            try:
                result_text = await self.on_model_selected(
@@ -532,6 +532,7 @@ class EmailAdapter(BasePlatformAdapter):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an image URL as part of an email body."""
        text = caption or ""
@@ -1700,6 +1700,7 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
            if ul_match:
                items = []
-                while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
-                    items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
+                while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
+                    items.append(m.group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ul>{li}</ul>")
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
            if ol_match:
                items = []
-                while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
-                    items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
+                while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
+                    items.append(m.group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ol>{li}</ol>")
@@ -535,6 +535,9 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return

            except Exception as exc:
                if not self._running:
@@ -1839,6 +1842,7 @@ class QQAdapter(BasePlatformAdapter):
                    await asyncio.sleep(1.5 * (attempt + 1))
                else:
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1690,6 +1690,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
@@ -1715,6 +1716,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -25,7 +25,10 @@ import hmac
 import logging
 import os
 import urllib.parse
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import aiohttp

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -2820,6 +2820,8 @@ class TelegramAdapter(BasePlatformAdapter):
        )

        sticker = msg.sticker
+        if sticker is None:
+            return
        emoji = sticker.emoji or ""
        set_name = sticker.set_name or ""

@@ -151,7 +151,7 @@ def _resolve_system_dns() -> set[str]:
    """Return the IPv4 addresses that the OS resolver gives for api.telegram.org."""
    try:
        results = socket.getaddrinfo(_TELEGRAM_API_HOST, 443, socket.AF_INET)
-        return {addr[4][0] for addr in results}
+        return {str(addr[4][0]) for addr in results}
    except Exception:
        return set()

@@ -508,6 +508,11 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
+        # Strip leading @mention in group chats so slash commands like
+        # "@BotName /approve" are correctly recognized as "/approve".
+        # Mirrors what the Telegram adapter does (re.sub @botname).
+        if is_group and text:
+            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -698,7 +703,8 @@ class WeComAdapter(BasePlatformAdapter):
                elif isinstance(appmsg.get("image"), dict):
                    refs.append(("image", appmsg["image"]))

-        quote = body.get("quote") if isinstance(body.get("quote"), dict) else {}
+        raw_quote = body.get("quote")
+        quote = raw_quote if isinstance(raw_quote, dict) else {}
        quote_type = str(quote.get("msgtype") or "").lower()
        if quote_type == "image" and isinstance(quote.get("image"), dict):
            refs.append(("image", quote["image"]))
@@ -25,7 +25,10 @@ import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
-from typing import Dict, Optional, Any
+from typing import Dict, Optional, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import aiohttp

 from hermes_constants import get_hermes_dir

@@ -2859,10 +2859,12 @@ class GatewayRunner:
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
-            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
-            if not check_api_server_requirements():
+            try:
+                import aiohttp  # noqa: F401
+            except ImportError:
                logger.warning("API Server: aiohttp not installed")
                return None
+            from gateway.platforms.api_server import APIServerAdapter
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
@@ -4429,9 +4431,10 @@ class GatewayRunner:
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
+            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
-            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
+            if guild_id and isinstance(adapter, DiscordAdapter):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"
@@ -5484,6 +5487,7 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
+                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -5595,6 +5599,7 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
+                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -5872,7 +5877,7 @@ class GatewayRunner:
        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
-    async def _handle_retry_command(self, event: MessageEvent) -> str:
+    async def _handle_retry_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -6022,9 +6027,10 @@ class GatewayRunner:
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
+            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
-            if guild_id and hasattr(adapter, "get_voice_channel_info"):
+            if guild_id and isinstance(adapter, DiscordAdapter):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
@@ -6055,8 +6061,9 @@ class GatewayRunner:

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
+        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
-        if not hasattr(adapter, "join_voice_channel"):
+        if not isinstance(adapter, DiscordAdapter):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
@@ -6071,10 +6078,8 @@ class GatewayRunner:

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
-        if hasattr(adapter, "_voice_input_callback"):
-            adapter._voice_input_callback = self._handle_voice_channel_input
-        if hasattr(adapter, "_on_voice_disconnect"):
-            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
+        adapter._voice_input_callback = self._handle_voice_channel_input
+        adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
@@ -6091,8 +6096,7 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
-            if hasattr(adapter, "_voice_sources"):
-                adapter._voice_sources[guild_id] = event.source.to_dict()
+            adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -6106,13 +6110,14 @@ class GatewayRunner:

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
+        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

-        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
+        if not guild_id or not isinstance(adapter, DiscordAdapter):
            return "Not in a voice channel."

-        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
+        if not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
@@ -6123,8 +6128,7 @@ class GatewayRunner:
        self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-        if hasattr(adapter, "_voice_input_callback"):
-            adapter._voice_input_callback = None
+        adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
@@ -6284,13 +6288,13 @@ class GatewayRunner:
            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
+            from gateway.platforms.discord import DiscordAdapter
            guild_id = self._get_guild_id(event)
            if (guild_id
-                    and hasattr(adapter, "play_in_voice_channel")
-                    and hasattr(adapter, "is_in_voice_channel")
+                    and isinstance(adapter, DiscordAdapter)
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
-            elif adapter and hasattr(adapter, "send_voice"):
+            elif adapter:
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
@@ -10486,6 +10490,7 @@ class GatewayRunner:
                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)

+                assert _agent_timeout is not None  # narrowed by _idle_secs >= _agent_timeout above
                _timeout_mins = int(_agent_timeout // 60) or 1

                # Construct a user-facing message with diagnostic context.
@@ -10604,7 +10609,7 @@ class GatewayRunner:
                pending = None

            if pending_event or pending:
-                logger.debug("Processing pending message: '%s...'", pending[:40])
+                logger.debug("Processing pending message: '%s...'", (pending or "")[:40])

                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
@@ -10623,8 +10628,6 @@ class GatewayRunner:
                    adapter = self.adapters.get(source.platform)
                    if adapter and pending_event:
                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
-                    elif adapter and hasattr(adapter, 'queue_message'):
-                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
@@ -10706,7 +10709,7 @@ class GatewayRunner:
                        history=updated_history,
                    )
                    if next_message is None:
-                        return result
+                        return result  # ty: ignore[invalid-return-type]
                    next_message_id = getattr(pending_event, "message_id", None)
                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)

@@ -496,7 +496,8 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError):
+            except (ProcessLookupError, PermissionError, OSError):
+                # Windows raises OSError with WinError 87 for invalid pid check
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -743,6 +744,10 @@ def get_running_pid(
            if _record_looks_like_gateway(record):
                return pid
            continue
+        except OSError:
+            # Windows raises OSError with WinError 87 for an invalid pid
+            # (process is definitely gone). Treat as "process doesn't exist".
+            continue

        recorded_start = record.get("start_time")
        current_start = _get_process_start_time(pid)
@@ -768,16 +768,20 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


-def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
-    """Return the persisted credential pool, or one provider slice."""
+def read_credential_pool() -> Dict[str, Any]:
+    """Return the entire persisted credential pool."""
    auth_store = _load_auth_store()
    pool = auth_store.get("credential_pool")
    if not isinstance(pool, dict):
        pool = {}
-    if provider_id is None:
-        return dict(pool)
-    provider_entries = pool.get(provider_id)
-    return list(provider_entries) if isinstance(provider_entries, list) else []
+    return dict(pool)
+
+
+def read_provider_credentials(provider_id: str) -> List[Dict[str, Any]]:
+    """Return credential entries for a single provider."""
+    pool = read_credential_pool()
+    entries = pool.get(provider_id)
+    return list(entries) if isinstance(entries, list) else []


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
@@ -276,7 +276,7 @@ def _get_ps_exe() -> str | None:
    global _ps_exe
    if _ps_exe is False:
        _ps_exe = _find_powershell()
-    return _ps_exe
+    return _ps_exe if isinstance(_ps_exe, str) else None


 def _windows_has_image() -> bool:
@@ -387,6 +387,8 @@ def _wayland_save(dest: Path) -> bool:

    except FileNotFoundError:
        logger.debug("wl-paste not installed — Wayland clipboard unavailable")
+    except ImportError:
+        raise
    except Exception as e:
        logger.debug("wl-paste clipboard extraction failed: %s", e)
        dest.unlink(missing_ok=True)
@@ -395,14 +397,17 @@ def _wayland_save(dest: Path) -> bool:

 def _convert_to_png(path: Path) -> bool:
    """Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
-    # Try Pillow first (likely installed in the venv)
    try:
        from PIL import Image
+    except ImportError:
+        raise ImportError(
+            "Pillow is required for clipboard image conversion. "
+            "Install with: pip install hermes-agent[cli]"
+        ) from None
+    try:
        img = Image.open(path)
        img.save(path, "PNG")
        return True
-    except ImportError:
-        pass
    except Exception as e:
        logger.debug("Pillow BMP→PNG conversion failed: %s", e)

@@ -1904,7 +1904,7 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
    config = load_config()
    missing = []

-    def _check(defaults: dict, current: dict, prefix: str = ""):
+    def _check(defaults: Dict[str, Any], current: Dict[str, Any], prefix: str = ""):
        for key, default_value in defaults.items():
            if key.startswith('_'):
                continue
@@ -2055,6 +2055,14 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
+    elif isinstance(models, list) and models:
+        # Hand-edited configs (and older Hermes versions) write ``models`` as
+        # a plain list of model ids. Preserve them by converting to the dict
+        # shape downstream code expects; otherwise normalize silently drops
+        # the list and /model shows the provider with (0) models.
+        normalized["models"] = {
+            str(m): {} for m in models if isinstance(m, str) and m.strip()
+        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -2138,8 +2146,8 @@ def check_config_version() -> Tuple[int, int]:
    Returns (current_version, latest_version).
    """
    config = load_config()
-    current = config.get("_config_version", 0)
-    latest = DEFAULT_CONFIG.get("_config_version", 1)
+    current = int(config.get("_config_version", 0))
+    latest = int(DEFAULT_CONFIG.get("_config_version", 1))
    return current, latest


@@ -2859,7 +2867,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
    return results


-def _deep_merge(base: dict, override: dict) -> dict:
+def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
    """Recursively merge *override* into *base*, preserving nested defaults.

    Keys in *override* take precedence. If both values are dicts the merge
@@ -18,7 +18,7 @@ import os
 import sys
 import time
 import logging
-from typing import Optional, Tuple
+from typing import Any, Callable, Optional, Tuple

 import requests

@@ -108,7 +108,7 @@ def wait_for_registration_success(
    device_code: str,
    interval: int = 3,
    expires_in: int = 7200,
-    on_waiting: Optional[callable] = None,
+    on_waiting: Optional[Callable[..., Any]] = None,
 ) -> Tuple[str, str]:
    """Block until the registration succeeds or times out.

@@ -761,6 +761,21 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


+class UserSystemdUnavailableError(RuntimeError):
+    """Raised when ``systemctl --user`` cannot reach the user D-Bus session.
+
+    Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
+    and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
+    Carries a user-facing remediation message in ``args[0]``.
+    """
+
+
+def _user_dbus_socket_path() -> Path:
+    """Return the expected per-user D-Bus socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "bus"
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -783,6 +798,126 @@ def _ensure_user_systemd_env() -> None:
            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"


+def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
+    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+
+    Linger-enabled user@.service can take a second or two to spawn the socket
+    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    """
+    import time
+
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if _user_dbus_socket_path().exists():
+            _ensure_user_systemd_env()
+            return True
+        time.sleep(0.2)
+    return _user_dbus_socket_path().exists()
+
+
+def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
+    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+
+    No-op when the bus socket is already there (the common case on desktops
+    and linger-enabled servers).  On fresh SSH sessions where the socket is
+    missing:
+
+    * If linger is already enabled, wait briefly for user@.service to spawn
+      the socket.
+    * If linger is disabled and ``auto_enable_linger`` is True, try
+      ``loginctl enable-linger $USER`` (works as non-root when polkit permits
+      it, otherwise needs sudo).
+    * If the socket is still missing afterwards, raise
+      :class:`UserSystemdUnavailableError` with a precise remediation message.
+
+    Callers should treat the exception as a terminal condition for user-scope
+    systemd operations and surface the message to the user.
+    """
+    _ensure_user_systemd_env()
+    bus_path = _user_dbus_socket_path()
+    if bus_path.exists():
+        return
+
+    import getpass
+
+    username = getpass.getuser()
+    linger_enabled, linger_detail = get_systemd_linger_status()
+
+    if linger_enabled is True:
+        if _wait_for_user_dbus_socket(timeout=3.0):
+            return
+        # Linger is on but socket still missing — unusual; fall through to error.
+        _raise_user_systemd_unavailable(
+            username,
+            reason="User D-Bus socket is missing even though linger is enabled.",
+            fix_hint=(
+                f"  systemctl start user@{os.getuid()}.service\n"
+                "  (may require sudo; try again after the command succeeds)"
+            ),
+        )
+
+    if auto_enable_linger and shutil.which("loginctl"):
+        try:
+            result = subprocess.run(
+                ["loginctl", "enable-linger", username],
+                capture_output=True,
+                text=True,
+                check=False,
+                timeout=30,
+            )
+        except Exception as exc:
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger failed ({exc}).",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+        else:
+            if result.returncode == 0:
+                if _wait_for_user_dbus_socket(timeout=5.0):
+                    print(f"✓ Enabled linger for {username} — user D-Bus now available")
+                    return
+                # enable-linger succeeded but the socket never appeared.
+                _raise_user_systemd_unavailable(
+                    username,
+                    reason="Linger was enabled, but the user D-Bus socket did not appear.",
+                    fix_hint=(
+                        "  Log out and log back in, then re-run the command.\n"
+                        f"  Or reboot and run: systemctl --user start {get_service_name()}"
+                    ),
+                )
+            detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger was denied: {detail}",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+
+    _raise_user_systemd_unavailable(
+        username,
+        reason=(
+            "User D-Bus session is not available "
+            f"({linger_detail or 'linger disabled'})."
+        ),
+        fix_hint=f"  sudo loginctl enable-linger {username}",
+    )
+
+
+def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
+    """Build a user-facing error message and raise UserSystemdUnavailableError."""
+    msg = (
+        f"{reason}\n"
+        "  systemctl --user cannot reach the user D-Bus session in this shell.\n"
+        "\n"
+        "  To fix:\n"
+        f"{fix_hint}\n"
+        "\n"
+        "  Alternative: run the gateway in the foreground (stays up until\n"
+        "  you exit / close the terminal):\n"
+        "    hermes gateway run"
+    )
+    raise UserSystemdUnavailableError(msg)
+
+
 def _systemctl_cmd(system: bool = False) -> list[str]:
    if not system:
        _ensure_user_systemd_env()
@@ -1623,6 +1758,11 @@ def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("start")
+    else:
+        # Fail fast with actionable guidance if the user D-Bus session is not
+        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
+        # Raises UserSystemdUnavailableError with a remediation message.
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1642,6 +1782,8 @@ def systemd_restart(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("restart")
+    else:
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

@@ -2905,6 +3047,12 @@ def _setup_wecom():
    print_success("💬 WeCom configured!")


+def _setup_wecom_callback():
+    """Configure WeCom Callback (self-built app) via the standard platform setup."""
+    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom_callback")
+    _setup_standard_platform(wecom_platform)
+
+
 def _is_service_installed() -> bool:
    """Check if the gateway is installed as a system service."""
    if supports_systemd_services():
@@ -3516,6 +3664,10 @@ def gateway_setup():
                    systemd_start()
                elif is_macos():
                    launchd_start()
+            except UserSystemdUnavailableError as e:
+                print_error("  Failed to start — user systemd not reachable:")
+                for line in str(e).splitlines():
+                    print(f"  {line}")
            except subprocess.CalledProcessError as e:
                print_error(f"  Failed to start: {e}")
    else:
@@ -3580,6 +3732,10 @@ def gateway_setup():
                    else:
                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -3589,6 +3745,10 @@ def gateway_setup():
                        systemd_start()
                    elif is_macos():
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Start failed: {e}")
        else:
@@ -3612,6 +3772,10 @@ def gateway_setup():
                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
+                            except UserSystemdUnavailableError as e:
+                                print_error("  Start failed — user systemd not reachable:")
+                                for line in str(e).splitlines():
+                                    print(f"  {line}")
                            except subprocess.CalledProcessError as e:
                                print_error(f"  Start failed: {e}")
                    except subprocess.CalledProcessError as e:
@@ -3649,6 +3813,18 @@ def gateway_setup():

 def gateway_command(args):
    """Handle gateway subcommands."""
+    try:
+        return _gateway_command_inner(args)
+    except UserSystemdUnavailableError as e:
+        # Clean, actionable message instead of a traceback when the user D-Bus
+        # session is unreachable (fresh SSH shell, no linger, container, etc.).
+        print_error("User systemd not reachable:")
+        for line in str(e).splitlines():
+            print(f"  {line}")
+        sys.exit(1)
+
+
+def _gateway_command_inner(args):
    subcmd = getattr(args, 'gateway_command', None)
    
    # Default to run if no subcommand
@@ -1131,6 +1131,20 @@ def cmd_chat(args):
    if getattr(args, "yolo", False):
        os.environ["HERMES_YOLO_MODE"] = "1"

+    # --ignore-user-config: make load_cli_config() / load_config() skip the
+    # user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
+    # importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
+    # import time). Credentials in .env are still loaded — this flag only
+    # ignores behavioral/config settings.
+    if getattr(args, "ignore_user_config", False):
+        os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
+
+    # --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
+    # (rules), memory entries, and any preloaded skills coming from user config.
+    # Maps to AIAgent(skip_context_files=True, skip_memory=True).
+    if getattr(args, "ignore_rules", False):
+        os.environ["HERMES_IGNORE_RULES"] = "1"
+
    # --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
    if getattr(args, "source", None):
        os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1159,6 +1173,8 @@ def cmd_chat(args):
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
        "max_turns": getattr(args, "max_turns", None),
+        "ignore_rules": getattr(args, "ignore_rules", False),
+        "ignore_user_config": getattr(args, "ignore_user_config", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -6606,6 +6622,18 @@ For more help on a command:
        default=False,
        help="Include the session ID in the agent's system prompt",
    )
+    parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=False,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
+    )
+    parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=False,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
+    )
    parser.add_argument(
        "--tui",
        action="store_true",
@@ -6745,6 +6773,18 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Include the session ID in the agent's system prompt",
    )
+    chat_parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
+    )
+    chat_parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
+    )
    chat_parser.add_argument(
        "--source",
        default=None,
@@ -782,6 +782,7 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
+    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@@ -847,6 +848,10 @@ def list_authenticated_providers(
        # source of truth.  models.dev can have wrong mappings (e.g.
        # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
        pconfig = PROVIDER_REGISTRY.get(hermes_id)
+        # Skip non-API-key auth providers here — they are handled in
+        # section 2 (HERMES_OVERLAYS) with proper auth store checking.
+        if pconfig and pconfig.auth_type != "api_key":
+            continue
        if pconfig and pconfig.api_key_env_vars:
            env_vars = list(pconfig.api_key_env_vars)
        else:
@@ -1117,66 +1122,113 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
+    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
+    # are grouped into a single picker row, so e.g. four Ollama entries
+    # pointing at ``http://localhost:11434/v1`` with per-model display names
+    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # "Ollama" row with four models inside instead of four near-duplicates
+    # that differ only by suffix. Entries with distinct endpoints still
+    # produce separate rows.
+    #
+    # When the grouped endpoint matches ``current_base_url`` the group's
+    # slug becomes ``current_provider`` so that selecting a model from the
+    # picker flows back through the runtime provider that already holds
+    # valid credentials — no re-resolution needed.
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        groups: "OrderedDict[str, dict]" = OrderedDict()
+        # Key by (base_url, api_key) instead of slug: names frequently
+        # differ per model ("Ollama — X") while the endpoint stays the
+        # same. Slug-based grouping left them as separate rows.
+        groups: "OrderedDict[tuple, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            display_name = (entry.get("name") or "").strip()
+            raw_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip()
-            if not display_name or not api_url:
+            ).strip().rstrip("/")
+            if not raw_name or not api_url:
                continue
+            api_key = (entry.get("api_key") or "").strip()

-            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
+            group_key = (api_url, api_key)
+            if group_key not in groups:
+                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
+                # "Ollama" for the grouped row. Em dash is the convention
+                # Hermes's own writer uses; a hyphen variant is accepted
+                # for hand-edited configs.
+                display_name = raw_name
+                for sep in ("—", " - "):
+                    if sep in display_name:
+                        display_name = display_name.split(sep)[0].strip()
+                        break
+                if not display_name:
+                    display_name = raw_name
+                # If this endpoint matches the currently active one, use
+                # ``current_provider`` as the slug so picker-driven switches
+                # route through the live credential pipeline.
+                if (
+                    current_base_url
+                    and api_url == current_base_url.strip().rstrip("/")
+                ):
+                    slug = current_provider or custom_provider_slug(display_name)
+                else:
+                    slug = custom_provider_slug(display_name)
+                groups[group_key] = {
+                    "slug": slug,
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
+
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
-            # The /model picker previously ignored it, so multi-model
-            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
+            if default_model and default_model not in groups[group_key]["models"]:
+                groups[group_key]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)

-        for slug, grp in groups.items():
-            if slug.lower() in seen_slugs:
+        _section4_emitted_slugs: set = set()
+        for grp in groups.values():
+            slug = grp["slug"]
+            # If the slug is already claimed by a built-in / overlay /
+            # user-provider row (sections 1-3), skip this custom group
+            # to avoid shadowing a real provider.
+            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
                continue
+            # If a prior section-4 group already used this slug (two custom
+            # endpoints with the same cleaned name — e.g. two OpenAI-
+            # compatible gateways named identically with different keys),
+            # append a counter so both rows stay visible in the picker.
+            if slug.lower() in _section4_emitted_slugs:
+                base_slug = slug
+                n = 2
+                while f"{base_slug}-{n}".lower() in seen_slugs:
+                    n += 1
+                slug = f"{base_slug}-{n}"
+                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url),
-            # the tuple section 4 groups by.  Prevents two picker rows
-            # labelled identically when callers pass both ``user_providers``
-            # and a compatibility-merged ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url).
+            # Prevents two picker rows labelled identically when callers
+            # pass both ``user_providers`` and a compatibility-merged
+            # ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1194,6 +1246,7 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
+            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
@@ -44,7 +44,7 @@ def _cmd_list(store):
        for p in pending:
            print(
                f"  {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
-                f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
+                f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
            )
    else:
        print("\n  No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
        print(f"  {'Platform':<12} {'User ID':<20} {'Name':<20}")
        print(f"  {'--------':<12} {'-------':<20} {'----':<20}")
        for a in approved:
-            print(f"  {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
+            print(f"  {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
    else:
        print("\n  No approved users.")

@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
    result = store.approve_code(platform, code)
    if result:
        uid = result["user_id"]
-        name = result.get("user_name", "")
+        name = result.get("user_name") or ""
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
@@ -512,10 +512,23 @@ class PluginManager:
    # Public
    # -----------------------------------------------------------------------

-    def discover_and_load(self) -> None:
-        """Scan all plugin sources and load each plugin found."""
-        if self._discovered:
+    def discover_and_load(self, force: bool = False) -> None:
+        """Scan all plugin sources and load each plugin found.
+
+        When ``force`` is true, clear cached discovery state first so config
+        changes or newly-added bundled backends become visible in long-lived
+        sessions without requiring a full agent restart.
+        """
+        if self._discovered and not force:
            return
+        if force:
+            self._plugins.clear()
+            self._hooks.clear()
+            self._plugin_tool_names.clear()
+            self._cli_commands.clear()
+            self._plugin_commands.clear()
+            self._plugin_skills.clear()
+            self._context_engine = None
        self._discovered = True

        manifests: List[PluginManifest] = []
@@ -1029,9 +1042,13 @@ def get_plugin_manager() -> PluginManager:
    return _plugin_manager


-def discover_plugins() -> None:
-    """Discover and load all plugins (idempotent)."""
-    get_plugin_manager().discover_and_load()
+def discover_plugins(force: bool = False) -> None:
+    """Discover and load all plugins.
+
+    Default behavior is idempotent. Pass ``force=True`` to rescan plugin
+    manifests and reload state in the current process.
+    """
+    get_plugin_manager().discover_and_load(force=force)


 def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1082,10 +1099,13 @@ def get_pre_tool_call_block_message(
    return None


-def _ensure_plugins_discovered() -> PluginManager:
-    """Return the global manager after running idempotent plugin discovery."""
+def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
+    """Return the global manager after ensuring plugin discovery has run.
+
+    Pass ``force=True`` to rescan in the current process.
+    """
    manager = get_plugin_manager()
-    manager.discover_and_load()
+    manager.discover_and_load(force=force)
    return manager


@@ -863,19 +863,15 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
                pass


-def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
-    """Import a profile from a tar.gz archive.
+def _inspect_profile_archive_roots(archive: Path) -> set[str]:
+    """Return the archive's top-level directory names.

-    If *name* is not given, infers it from the archive's top-level directory.
-    Returns the imported profile directory.
+    Profile imports expect exactly one root directory. Inspecting the archive
+    before extraction lets us stage the import safely instead of mutating a
+    live profile tree first and reconciling names later.
    """
    import tarfile

-    archive = Path(archive_path)
-    if not archive.exists():
-        raise FileNotFoundError(f"Archive not found: {archive}")
-
-    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
        top_dirs = {
            parts[0]
@@ -889,13 +885,33 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
                for member in tf.getmembers()
                if member.isdir()
            }
+    return top_dirs

-    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
+
+def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
+    """Import a profile from a tar.gz archive.
+
+    If *name* is not given, infers it from the archive's top-level directory.
+    Returns the imported profile directory.
+    """
+    import tempfile
+
+    archive = Path(archive_path)
+    if not archive.exists():
+        raise FileNotFoundError(f"Archive not found: {archive}")
+
+    top_dirs = _inspect_profile_archive_roots(archive)
+    archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
+    inferred_name = name or archive_root
    if not inferred_name:
        raise ValueError(
            "Cannot determine profile name from archive. "
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )
+    if archive_root is None:
+        raise ValueError(
+            "Profile archive must contain exactly one top-level directory."
+        )

    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
@@ -914,12 +930,22 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    _safe_extract_profile_archive(archive, profiles_root)
+    with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
+        staging_root = Path(tmpdir)
+        _safe_extract_profile_archive(archive, staging_root)

-    # If the archive extracted under a different name, rename
-    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
-    if extracted != profile_dir and extracted.exists():
-        extracted.rename(profile_dir)
+        extracted = staging_root / archive_root
+        if not extracted.is_dir():
+            raise ValueError(
+                f"Profile archive root is missing or invalid: {archive_root}"
+            )
+
+        final_source = extracted
+        if archive_root != inferred_name:
+            final_source = staging_root / inferred_name
+            extracted.rename(final_source)
+
+        shutil.move(str(final_source), str(profile_dir))

    return profile_dir

@@ -1,221 +0,0 @@
-"""PTY bridge for `hermes dashboard` chat tab.
-
-Wraps a child process behind a pseudo-terminal so its ANSI output can be
-streamed to a browser-side terminal emulator (xterm.js) and typed
-keystrokes can be fed back in.  The only caller today is the
-``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
-
-Design constraints:
-
-* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
-  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
-  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
-  install/platform message so the dashboard can render a banner instead of
-  crashing.
-* **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
-  which is a pure-Python wrapper around the OS calls.  The browser talks
-  to the same ``hermes --tui`` binary it would launch from the CLI, so
-  every TUI feature (slash popover, model picker, tool rows, markdown,
-  skin engine, clarify/sudo/approval prompts) ships automatically.
-* **Byte-safe I/O.**  Reads and writes go through the PTY master fd
-  directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
-  streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
-  mid-read.
-"""
-
-from __future__ import annotations
-
-import errno
-import fcntl
-import os
-import select
-import signal
-import struct
-import sys
-import termios
-import time
-from typing import Optional, Sequence
-
-try:
-    import ptyprocess  # type: ignore
-    _PTY_AVAILABLE = not sys.platform.startswith("win")
-except ImportError:  # pragma: no cover - dev env without ptyprocess
-    ptyprocess = None  # type: ignore
-    _PTY_AVAILABLE = False
-
-
-__all__ = ["PtyBridge", "PtyUnavailableError"]
-
-
-class PtyUnavailableError(RuntimeError):
-    """Raised when a PTY cannot be created on this platform.
-
-    Today this means native Windows (no ConPTY bindings) or a dev
-    environment missing the ``ptyprocess`` dependency.  The dashboard
-    surfaces the message to the user as a chat-tab banner.
-    """
-
-
-class PtyBridge:
-    """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
-
-    Not thread-safe.  A single bridge is owned by the WebSocket handler
-    that spawned it; the reader runs in an executor thread while writes
-    happen on the event-loop thread.  Both sides are OK because the
-    kernel PTY is the actual synchronization point — we never call
-    :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
-    ``os.write`` on the master fd, which is safe.
-    """
-
-    def __init__(self, proc: "ptyprocess.PtyProcess"):  # type: ignore[name-defined]
-        self._proc = proc
-        self._fd: int = proc.fd
-        self._closed = False
-
-    # -- lifecycle --------------------------------------------------------
-
-    @classmethod
-    def is_available(cls) -> bool:
-        """True if a PTY can be spawned on this platform."""
-        return bool(_PTY_AVAILABLE)
-
-    @classmethod
-    def spawn(
-        cls,
-        argv: Sequence[str],
-        *,
-        cwd: Optional[str] = None,
-        env: Optional[dict] = None,
-        cols: int = 80,
-        rows: int = 24,
-    ) -> "PtyBridge":
-        """Spawn ``argv`` behind a new PTY and return a bridge.
-
-        Raises :class:`PtyUnavailableError` if the platform can't host a
-        PTY.  Raises :class:`FileNotFoundError` or :class:`OSError` for
-        ordinary exec failures (missing binary, bad cwd, etc.).
-        """
-        if not _PTY_AVAILABLE:
-            raise PtyUnavailableError(
-                "Pseudo-terminals are unavailable on this platform. "
-                "Hermes Agent supports Windows only via WSL."
-            )
-        # Let caller-supplied env fully override inheritance; if they pass
-        # None we inherit the server's env (same semantics as subprocess).
-        spawn_env = os.environ.copy() if env is None else env
-        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
-            list(argv),
-            cwd=cwd,
-            env=spawn_env,
-            dimensions=(rows, cols),
-        )
-        return cls(proc)
-
-    @property
-    def pid(self) -> int:
-        return int(self._proc.pid)
-
-    def is_alive(self) -> bool:
-        if self._closed:
-            return False
-        try:
-            return bool(self._proc.isalive())
-        except Exception:
-            return False
-
-    # -- I/O --------------------------------------------------------------
-
-    def read(self, timeout: float = 0.2) -> Optional[bytes]:
-        """Read up to 64 KiB of raw bytes from the PTY master.
-
-        Returns:
-            * bytes — zero or more bytes of child output
-            * empty bytes (``b""``) — no data available within ``timeout``
-            * None — child has exited and the master fd is at EOF
-
-        Never blocks longer than ``timeout`` seconds.  Safe to call after
-        :meth:`close`; returns ``None`` in that case.
-        """
-        if self._closed:
-            return None
-        try:
-            readable, _, _ = select.select([self._fd], [], [], timeout)
-        except (OSError, ValueError):
-            return None
-        if not readable:
-            return b""
-        try:
-            data = os.read(self._fd, 65536)
-        except OSError as exc:
-            # EIO on Linux = slave side closed.  EBADF = already closed.
-            if exc.errno in (errno.EIO, errno.EBADF):
-                return None
-            raise
-        if not data:
-            return None
-        return data
-
-    def write(self, data: bytes) -> None:
-        """Write raw bytes to the PTY master (i.e. the child's stdin)."""
-        if self._closed or not data:
-            return
-        # os.write can return a short write under load; loop until drained.
-        view = memoryview(data)
-        while view:
-            try:
-                n = os.write(self._fd, view)
-            except OSError as exc:
-                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
-                    return
-                raise
-            if n <= 0:
-                return
-            view = view[n:]
-
-    def resize(self, cols: int, rows: int) -> None:
-        """Forward a terminal resize to the child via ``TIOCSWINSZ``."""
-        if self._closed:
-            return
-        # struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
-        winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
-        try:
-            fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
-        except OSError:
-            pass
-
-    # -- teardown ---------------------------------------------------------
-
-    def close(self) -> None:
-        """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
-
-        Idempotent.  Reaping the child is important so we don't leak
-        zombies across the lifetime of the dashboard process.
-        """
-        if self._closed:
-            return
-        self._closed = True
-
-        # SIGHUP is the conventional "your terminal went away" signal.
-        # We escalate if the child ignores it.
-        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
-            if not self._proc.isalive():
-                break
-            try:
-                self._proc.kill(sig)
-            except Exception:
-                pass
-            deadline = time.monotonic() + 0.5
-            while self._proc.isalive() and time.monotonic() < deadline:
-                time.sleep(0.02)
-
-        try:
-            self._proc.close(force=True)
-        except Exception:
-            pass
-
-    # Context-manager sugar — handy in tests and ad-hoc scripts.
-    def __enter__(self) -> "PtyBridge":
-        return self
-
-    def __exit__(self, *_exc) -> None:
-        self.close()
@@ -2334,6 +2334,7 @@ def setup_gateway(config: dict):
            launchd_install,
            launchd_start,
            launchd_restart,
+            UserSystemdUnavailableError,
        )

        service_installed = _is_service_installed()
@@ -2357,6 +2358,10 @@ def setup_gateway(config: dict):
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -2366,6 +2371,10 @@ def setup_gateway(config: dict):
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Start failed: {e}")
        elif supports_service_manager:
@@ -2389,6 +2398,10 @@ def setup_gateway(config: dict):
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
+                        except UserSystemdUnavailableError as e:
+                            print_error("  Start failed — user systemd not reachable:")
+                            for line in str(e).splitlines():
+                                print(f"  {line}")
                        except Exception as e:
                            print_error(f"  Start failed: {e}")
                except Exception as e:
@@ -13,7 +13,7 @@ import json as _json
 import logging
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict


 from hermes_cli.config import (
@@ -748,7 +748,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    OpenAI-format tool schema.  Triggers tool discovery on first call,
    then caches the result for the rest of the process.

-    Returns an empty dict when tiktoken or the registry is unavailable.
+    Returns an empty dict when the registry is unavailable.
    """
    global _tool_token_cache
    if _tool_token_cache is not None:
@@ -756,11 +756,12 @@ def _estimate_tool_tokens() -> Dict[str, int]:

    try:
        import tiktoken
-        enc = tiktoken.get_encoding("cl100k_base")
-    except Exception:
-        logger.debug("tiktoken unavailable; skipping tool token estimation")
-        _tool_token_cache = {}
-        return _tool_token_cache
+    except ImportError:
+        raise ImportError(
+            "tiktoken is required for tool token estimation. "
+            "Install with: pip install hermes-agent[cli]"
+        ) from None
+    enc = tiktoken.get_encoding("cl100k_base")

    try:
        # Trigger full tool discovery (imports all tool modules).
@@ -1019,6 +1020,11 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        image_cfg = config.get("image_gen", {})
+        return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
+
    managed_feature = provider.get("managed_nous_feature")
    if managed_feature:
        features = get_nous_subscription_features(config)
@@ -1026,6 +1032,13 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        if feature is None:
            return False
        if managed_feature == "image_gen":
+            image_cfg = config.get("image_gen", {})
+            if isinstance(image_cfg, dict):
+                configured_provider = image_cfg.get("provider")
+                if configured_provider not in (None, "", "fal"):
+                    return False
+                if image_cfg.get("use_gateway") is False:
+                    return False
            return feature.managed_by_nous
        if provider.get("tts_provider"):
            return (
@@ -1048,6 +1061,16 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
    if provider.get("web_backend"):
        current = config.get("web", {}).get("backend")
        return current == provider["web_backend"]
+    if provider.get("imagegen_backend"):
+        image_cfg = config.get("image_gen", {})
+        if not isinstance(image_cfg, dict):
+            return False
+        configured_provider = image_cfg.get("provider")
+        return (
+            provider["imagegen_backend"] == "fal"
+            and configured_provider in (None, "", "fal")
+            and not image_cfg.get("use_gateway")
+        )
    return False


@@ -1076,13 +1099,19 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 # right catalog at picker time.


-def _fal_model_catalog():
+class _ImagegenBackend(TypedDict):
+    display: str
+    config_key: str
+    catalog_fn: Callable[[], Tuple[Dict[str, Dict[str, Any]], str]]
+
+
+def _fal_model_catalog() -> Tuple[Dict[str, Dict[str, Any]], str]:
    """Lazy-load the FAL model catalog from the tool module."""
    from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
    return FAL_MODELS, DEFAULT_MODEL


-IMAGEGEN_BACKENDS = {
+IMAGEGEN_BACKENDS: Dict[str, _ImagegenBackend] = {
    "fal": {
        "display": "FAL.ai",
        "config_key": "image_gen",
@@ -1245,6 +1274,18 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
    _print_success(f"  Model set to: {chosen}")


+def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
+    """Persist a plugin-backed image generation provider selection."""
+    img_cfg = config.setdefault("image_gen", {})
+    if not isinstance(img_cfg, dict):
+        img_cfg = {}
+        config["image_gen"] = img_cfg
+    img_cfg["provider"] = plugin_name
+    img_cfg["use_gateway"] = False
+    _print_success(f"  image_gen.provider set to: {plugin_name}")
+    _configure_imagegen_model_for_plugin(plugin_name, config)
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1305,13 +1346,7 @@ def _configure_provider(provider: dict, config: dict):
        # and route model selection to the plugin's own catalog.
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
@@ -1359,13 +1394,7 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} configured!")
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
@@ -1539,16 +1568,39 @@ def _reconfigure_provider(provider: dict, config: dict):
        config.setdefault("web", {})["backend"] = provider["web_backend"]
        _print_success(f"  Web backend set to: {provider['web_backend']}")

+    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+        section = config.setdefault(managed_feature, {})
+        if not isinstance(section, dict):
+            section = {}
+            config[managed_feature] = section
+        section["use_gateway"] = True
+    elif not managed_feature:
+        for cat_key, cat in TOOL_CATEGORIES.items():
+            if provider in cat.get("providers", []):
+                section = config.get(cat_key)
+                if isinstance(section, dict) and section.get("use_gateway"):
+                    section["use_gateway"] = False
+                break
+
    if not env_vars:
        if provider.get("post_setup"):
            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            _select_plugin_image_gen_provider(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection on reconfig too.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            if backend == "fal":
+                img_cfg = config.setdefault("image_gen", {})
+                if isinstance(img_cfg, dict):
+                    img_cfg["provider"] = "fal"
+                    img_cfg["use_gateway"] = False
        return

    for var in env_vars:
@@ -1567,9 +1619,19 @@ def _reconfigure_provider(provider: dict, config: dict):
            _print_info("    Kept current")

    # Imagegen backends prompt for model selection on reconfig too.
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        _select_plugin_image_gen_provider(plugin_name, config)
+        return
+
    backend = provider.get("imagegen_backend")
    if backend:
        _configure_imagegen_model(backend, config)
+        if backend == "fal":
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict):
+                img_cfg["provider"] = "fal"
+                img_cfg["use_gateway"] = False


 def _reconfigure_simple_requirements(ts_key: str):
@@ -49,7 +49,7 @@ from hermes_cli.config import (
 from gateway.status import get_running_pid, read_runtime_status

 try:
-    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
+    from fastapi import FastAPI, HTTPException, Request
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
@@ -2242,148 +2242,6 @@ async def get_usage_analytics(days: int = 30):
        db.close()


-# ---------------------------------------------------------------------------
-# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
-#
-# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
-# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
-# WebSocket.  The browser renders the ANSI through xterm.js (see
-# web/src/pages/ChatPage.tsx).
-#
-# Auth: ``?token=<session_token>`` query param (browsers can't set
-# Authorization on the WS upgrade).  Same ephemeral ``_SESSION_TOKEN`` as
-# REST.  Localhost-only — we defensively reject non-loopback clients even
-# though uvicorn binds to 127.0.0.1.
-# ---------------------------------------------------------------------------
-
-import re
-import asyncio
-
-from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
-
-_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
-_PTY_READ_CHUNK_TIMEOUT = 0.2
-# Starlette's TestClient reports the peer as "testclient"; treat it as
-# loopback so tests don't need to rewrite request scope.
-_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
-
-
-def _resolve_chat_argv(
-    resume: Optional[str] = None,
-) -> tuple[list[str], Optional[str], Optional[dict]]:
-    """Resolve the argv + cwd + env for the chat PTY.
-
-    Default: whatever ``hermes --tui`` would run.  Tests monkeypatch this
-    function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
-    so nothing has to build Node or the TUI bundle.
-
-    Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
-    matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
-    Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
-    not parse its argv.
-    """
-    from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
-
-    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
-    env: Optional[dict] = None
-    if resume:
-        env = os.environ.copy()
-        env["HERMES_TUI_RESUME"] = resume
-    return list(argv), str(cwd) if cwd else None, env
-
-
-@app.websocket("/api/pty")
-async def pty_ws(ws: WebSocket) -> None:
-    # --- auth + loopback check (before accept so we can close cleanly) ---
-    token = ws.query_params.get("token", "")
-    expected = _SESSION_TOKEN
-    if not hmac.compare_digest(token.encode(), expected.encode()):
-        await ws.close(code=4401)
-        return
-
-    client_host = ws.client.host if ws.client else ""
-    if client_host and client_host not in _LOOPBACK_HOSTS:
-        await ws.close(code=4403)
-        return
-
-    await ws.accept()
-
-    # --- spawn PTY ------------------------------------------------------
-    resume = ws.query_params.get("resume") or None
-    try:
-        argv, cwd, env = _resolve_chat_argv(resume=resume)
-    except SystemExit as exc:
-        # _make_tui_argv calls sys.exit(1) when node/npm is missing.
-        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
-        await ws.close(code=1011)
-        return
-
-
-    try:
-        bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
-    except PtyUnavailableError as exc:
-        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
-        await ws.close(code=1011)
-        return
-    except (FileNotFoundError, OSError) as exc:
-        await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
-        await ws.close(code=1011)
-        return
-
-    loop = asyncio.get_running_loop()
-
-    # --- reader task: PTY master → WebSocket ----------------------------
-    async def pump_pty_to_ws() -> None:
-        while True:
-            chunk = await loop.run_in_executor(
-                None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
-            )
-            if chunk is None:  # EOF
-                return
-            if not chunk:  # no data this tick; yield control and retry
-                await asyncio.sleep(0)
-                continue
-            try:
-                await ws.send_bytes(chunk)
-            except Exception:
-                return
-
-    reader_task = asyncio.create_task(pump_pty_to_ws())
-
-    # --- writer loop: WebSocket → PTY master ----------------------------
-    try:
-        while True:
-            msg = await ws.receive()
-            msg_type = msg.get("type")
-            if msg_type == "websocket.disconnect":
-                break
-            raw = msg.get("bytes")
-            if raw is None:
-                text = msg.get("text")
-                raw = text.encode("utf-8") if isinstance(text, str) else b""
-            if not raw:
-                continue
-
-            # Resize escape is consumed locally, never written to the PTY.
-            match = _RESIZE_RE.match(raw)
-            if match and match.end() == len(raw):
-                cols = int(match.group(1))
-                rows = int(match.group(2))
-                bridge.resize(cols=cols, rows=rows)
-                continue
-
-            bridge.write(raw)
-    except WebSocketDisconnect:
-        pass
-    finally:
-        reader_task.cancel()
-        try:
-            await reader_task
-        except (asyncio.CancelledError, Exception):
-            pass
-        bridge.close()
-
-
 def mount_spa(application: FastAPI):
    """Mount the built SPA. Falls back to index.html for client-side routing.

@@ -142,7 +142,7 @@ class _ComponentFilter(logging.Filter):
 # Used by _ComponentFilter and exposed for ``hermes logs --component``.
 COMPONENT_PREFIXES = {
    "gateway": ("gateway",),
-    "agent": ("agent", "run_agent", "model_tools", "batch_runner"),
+    "agent": ("agent", "run_agent", "model_tools", "scripts.batch_runner"),
    "tools": ("tools",),
    "cli": ("hermes_cli", "cli"),
    "cron": ("cron",),
@@ -777,7 +777,10 @@ HERMES_NIX_ENV_EOF
            NoNewPrivileges = true;
            ProtectSystem = "strict";
            ProtectHome = false;
-            ReadWritePaths = [ cfg.stateDir ];
+            ReadWritePaths = [
+              cfg.stateDir
+              cfg.workingDirectory
+            ];
            PrivateTmp = true;
          };

@@ -0,0 +1,378 @@
+"""OpenAI image generation backend — ChatGPT/Codex OAuth variant.
+
+Identical model catalog and tier semantics to the ``openai`` image-gen plugin
+(``gpt-image-2`` at low/medium/high quality), but routes the request through
+the Codex Responses API ``image_generation`` tool instead of the
+``images.generate`` REST endpoint. This lets users who are already
+authenticated with Codex/ChatGPT generate images without configuring a
+separate ``OPENAI_API_KEY``.
+
+Selection precedence for the tier (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai-codex.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+
+Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog — mirrors the ``openai`` plugin so the picker UX is identical.
+# ---------------------------------------------------------------------------
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+# Codex Responses surface used for the request. The chat model itself is only
+# the host that calls the ``image_generation`` tool; the actual image work is
+# done by ``API_MODEL``.
+_CODEX_CHAT_MODEL = "gpt-5.4"
+_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+_CODEX_INSTRUCTIONS = (
+    "You are an assistant that must fulfill image generation requests by "
+    "using the image_generation tool when provided."
+)
+
+
+# ---------------------------------------------------------------------------
+# Config + auth helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_image_gen_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    import os
+
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_image_gen_config()
+    sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(sub, dict):
+        value = sub.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+def _read_codex_access_token() -> Optional[str]:
+    """Return a usable Codex OAuth token, or None.
+
+    Delegates to the canonical reader in ``agent.auxiliary_client`` so token
+    expiry, credential pool selection, and JWT decoding stay in one place.
+    """
+    try:
+        from agent.auxiliary_client import _read_codex_access_token as _reader
+
+        token = _reader()
+        if isinstance(token, str) and token.strip():
+            return token.strip()
+        return None
+    except Exception as exc:
+        logger.debug("Could not resolve Codex access token: %s", exc)
+        return None
+
+
+def _build_codex_client():
+    """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
+    token = _read_codex_access_token()
+    if not token:
+        return None
+    try:
+        import openai
+        from agent.auxiliary_client import _codex_cloudflare_headers
+
+        return openai.OpenAI(
+            api_key=token,
+            base_url=_CODEX_BASE_URL,
+            default_headers=_codex_cloudflare_headers(token),
+        )
+    except Exception as exc:
+        logger.debug("Could not build Codex image client: %s", exc)
+        return None
+
+
+def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
+    """Stream a Codex Responses image_generation call and return the b64 image."""
+    image_b64: Optional[str] = None
+
+    with client.responses.stream(
+        model=_CODEX_CHAT_MODEL,
+        store=False,
+        instructions=_CODEX_INSTRUCTIONS,
+        input=[{
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": prompt}],
+        }],
+        tools=[{
+            "type": "image_generation",
+            "model": API_MODEL,
+            "size": size,
+            "quality": quality,
+            "output_format": "png",
+            "background": "opaque",
+            "partial_images": 1,
+        }],
+        tool_choice={
+            "type": "allowed_tools",
+            "mode": "required",
+            "tools": [{"type": "image_generation"}],
+        },
+    ) as stream:
+        for event in stream:
+            event_type = getattr(event, "type", "")
+            if event_type == "response.output_item.done":
+                item = getattr(event, "item", None)
+                if getattr(item, "type", None) == "image_generation_call":
+                    result = getattr(item, "result", None)
+                    if isinstance(result, str) and result:
+                        image_b64 = result
+            elif event_type == "response.image_generation_call.partial_image":
+                partial = getattr(event, "partial_image_b64", None)
+                if isinstance(partial, str) and partial:
+                    image_b64 = partial
+        final = stream.get_final_response()
+
+    # Final-response sweep covers the case where the stream finished before
+    # we observed the ``output_item.done`` event for the image call.
+    for item in getattr(final, "output", None) or []:
+        if getattr(item, "type", None) == "image_generation_call":
+            result = getattr(item, "result", None)
+            if isinstance(result, str) and result:
+                image_b64 = result
+
+    return image_b64
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAICodexImageGenProvider(ImageGenProvider):
+    """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key."""
+
+    @property
+    def name(self) -> str:
+        return "openai-codex"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI (Codex auth)"
+
+    def is_available(self) -> bool:
+        if not _read_codex_access_token():
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI (Codex auth)",
+            "badge": "free",
+            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
+            "env_vars": [],
+            "post_setup_hint": (
+                "Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
+                "if you haven't already. No API key needed."
+            ),
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        if not _read_codex_access_token():
+            return error_response(
+                error=(
+                    "No Codex/ChatGPT OAuth credentials available. Run "
+                    "`hermes auth codex` (or `hermes setup` → Codex) to sign in."
+                ),
+                error_type="auth_required",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        client = _build_codex_client()
+        if client is None:
+            return error_response(
+                error="Could not initialize Codex image client",
+                error_type="auth_required",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        try:
+            b64 = _collect_image_b64(
+                client,
+                prompt=prompt,
+                size=size,
+                quality=meta["quality"],
+            )
+        except Exception as exc:
+            logger.debug("Codex image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation via Codex auth failed: {exc}",
+                error_type="api_error",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        if not b64:
+            return error_response(
+                error="Codex response contained no image_generation_call result",
+                error_type="empty_response",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        try:
+            saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}")
+        except Exception as exc:
+            return error_response(
+                error=f"Could not save image to cache: {exc}",
+                error_type="io_error",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        return success_response(
+            image=str(saved_path),
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai-codex",
+            extra={"size": size, "quality": meta["quality"]},
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — register the Codex-backed image-gen provider."""
+    ctx.register_image_gen_provider(OpenAICodexImageGenProvider())
@@ -0,0 +1,5 @@
+name: openai-codex
+version: 1.0.0
+description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
@@ -39,12 +39,12 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
-dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
-messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
+dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
+messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8", "mutagen>=1.45,<2", "aiohttp-socks>=0.9,<1"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
-cli = ["simple-term-menu>=1.0,<2"]
+cli = ["simple-term-menu>=1.0,<2", "tiktoken>=0.7,<1", "Pillow>=10,<12"]
 tts-premium = ["elevenlabs>=1.0,<2"]
 voice = [
  # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
@@ -58,7 +58,7 @@ pty = [
  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
 ]
 honcho = ["honcho-ai>=2.0.1,<3"]
-mcp = ["mcp>=1.2.0,<2"]
+mcp = ["mcp>=1.2.0,<2", "psutil>=5.9,<7"]
 homeassistant = ["aiohttp>=3.9.0,<4"]
 sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.9.0,<1.0"]
@@ -85,7 +85,9 @@ rl = [
  "fastapi>=0.104.0,<1",
  "uvicorn[standard]>=0.24.0,<1",
  "wandb>=0.15.0,<1",
+  "datasets>=2.14,<3",
 ]
+tts-local = ["neutts[all]", "soundfile>=0.12,<1"]
 yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
  "hermes-agent[modal]",
@@ -120,13 +122,13 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]

 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "scripts"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -134,3 +136,28 @@ markers = [
    "integration: marks tests requiring external services (API keys, Modal, etc.)",
 ]
 addopts = "-m 'not integration' -n auto"
+
+[tool.ty.environment]
+python-version = "3.13"
+
+[tool.ty.rules]
+unknown-argument = "warn"
+redundant-cast = "ignore"
+
+[tool.ty.src]
+exclude = ["**"]
+
+[[tool.ty.overrides]]
+include = ["**"]
+
+[tool.ty.overrides.rules]
+unresolved-import = "ignore"
+invalid-method-override = "ignore"
+invalid-assignment = "ignore"
+not-iterable = "ignore"
+
+[tool.ruff]
+exclude = ["*"]
+
+[tool.uv]
+exclude-newer = "7 days"
@@ -37,7 +37,10 @@ import time
 import threading
 from types import SimpleNamespace
 import uuid
-from typing import List, Dict, Any, Optional
+from typing import Callable, List, Dict, Any, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from agent.rate_limit_tracker import RateLimitState
 from openai import OpenAI
 import fire
 from datetime import datetime
@@ -722,17 +725,17 @@ class AIAgent:
        provider_require_parameters: bool = False,
        provider_data_collection: str = None,
        session_id: str = None,
-        tool_progress_callback: callable = None,
-        tool_start_callback: callable = None,
-        tool_complete_callback: callable = None,
-        thinking_callback: callable = None,
-        reasoning_callback: callable = None,
-        clarify_callback: callable = None,
-        step_callback: callable = None,
-        stream_delta_callback: callable = None,
-        interim_assistant_callback: callable = None,
-        tool_gen_callback: callable = None,
-        status_callback: callable = None,
+        tool_progress_callback: Callable[..., Any] = None,
+        tool_start_callback: Callable[..., Any] = None,
+        tool_complete_callback: Callable[..., Any] = None,
+        thinking_callback: Callable[..., Any] = None,
+        reasoning_callback: Callable[..., Any] = None,
+        clarify_callback: Callable[..., Any] = None,
+        step_callback: Callable[..., Any] = None,
+        stream_delta_callback: Callable[..., Any] = None,
+        interim_assistant_callback: Callable[..., Any] = None,
+        tool_gen_callback: Callable[..., Any] = None,
+        status_callback: Callable[..., Any] = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        service_tier: str = None,
@@ -1048,7 +1051,7 @@ class AIAgent:
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
                    'run_agent',            # agent runner internals
-                    'trajectory_compressor',
+                    'scripts.trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
                    'hermes_cli',           # CLI helpers
                ]:
@@ -4767,7 +4770,7 @@ class AIAgent:
    def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
        self._close_openai_client(client, reason=reason, shared=False)

-    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
+    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: Callable[..., Any] = None):
        """Execute one streaming Responses API request and return the final response."""
        import httpx as _httpx

@@ -5466,7 +5469,7 @@ class AIAgent:
        )

    def _interruptible_streaming_api_call(
-        self, api_kwargs: dict, *, on_first_delta: callable = None
+        self, api_kwargs: dict, *, on_first_delta: Callable[..., Any] = None
    ):
        """Streaming variant of _interruptible_api_call for real-time token delivery.

@@ -6766,42 +6769,6 @@ class AIAgent:
            cache[mode] = t
        return t

-    @staticmethod
-    def _nr_to_assistant_message(nr):
-        """Convert a NormalizedResponse to the SimpleNamespace shape downstream expects.
-
-        This is the single back-compat shim between the transport layer
-        (NormalizedResponse) and the agent loop (SimpleNamespace with
-        .content, .tool_calls, .reasoning, .reasoning_content,
-        .reasoning_details, .codex_reasoning_items, and per-tool-call
-        .call_id / .response_item_id).
-
-        TODO: Remove when downstream code reads NormalizedResponse directly.
-        """
-        tc_list = None
-        if nr.tool_calls:
-            tc_list = []
-            for tc in nr.tool_calls:
-                tc_ns = SimpleNamespace(
-                    id=tc.id,
-                    type="function",
-                    function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                )
-                if tc.provider_data:
-                    for key in ("call_id", "response_item_id"):
-                        if tc.provider_data.get(key):
-                            setattr(tc_ns, key, tc.provider_data[key])
-                tc_list.append(tc_ns)
-        pd = nr.provider_data or {}
-        return SimpleNamespace(
-            content=nr.content,
-            tool_calls=tc_list or None,
-            reasoning=nr.reasoning,
-            reasoning_content=pd.get("reasoning_content"),
-            reasoning_details=pd.get("reasoning_details"),
-            codex_reasoning_items=pd.get("codex_reasoning_items"),
-        )
-
    def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
        if not any(
            isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@@ -7441,12 +7408,15 @@ class AIAgent:
                _flush_temperature = _fixed_temp
            else:
                _flush_temperature = 0.3
+            _flush_llm_kwargs: dict = {}
+            if _flush_temperature is not None:
+                _flush_llm_kwargs["temperature"] = _flush_temperature
            try:
                response = _call_llm(
                    task="flush_memories",
                    messages=api_messages,
                    tools=[memory_tool_def],
-                    temperature=_flush_temperature,
+                    **_flush_llm_kwargs,
                    max_tokens=5120,
                    # timeout resolved from auxiliary.flush_memories.timeout config
                )
@@ -7503,20 +7473,25 @@ class AIAgent:
                    ]
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                _tfn = self._get_transport()
-                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_nr and _flush_nr.tool_calls:
+                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
+                if _flush_result and _flush_result.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
                            id=tc.id, type="function",
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_nr.tool_calls
+                        ) for tc in _flush_result.tool_calls
                    ]
-            elif hasattr(response, "choices") and response.choices:
+            elif self.api_mode in ("chat_completions", "bedrock_converse"):
                # chat_completions / bedrock — normalize through transport
-                _flush_cc_nr = self._get_transport().normalize_response(response)
-                _flush_msg = self._nr_to_assistant_message(_flush_cc_nr)
-                if _flush_msg.tool_calls:
-                    tool_calls = _flush_msg.tool_calls
+                _flush_result = self._get_transport().normalize_response(response)
+                if _flush_result.tool_calls:
+                    tool_calls = _flush_result.tool_calls
+            elif _aux_available and hasattr(response, "choices") and response.choices:
+                # Auxiliary client returned OpenAI-shaped response while main
+                # api_mode is codex/anthropic — extract tool_calls from .choices
+                _aux_msg = response.choices[0].message
+                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
+                    tool_calls = _aux_msg.tool_calls

            for tc in tool_calls:
                if tc.function.name == "memory":
@@ -8582,12 +8557,12 @@ class AIAgent:
                                   is_oauth=self._is_anthropic_oauth,
                                   preserve_dots=self._anthropic_preserve_dots())
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_sum_nr.content or "").strip()
+                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_summary_result.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
-                    _sum_cc_nr = self._get_transport().normalize_response(summary_response)
-                    final_response = (_sum_cc_nr.content or "").strip()
+                    _summary_result = self._get_transport().normalize_response(summary_response)
+                    final_response = (_summary_result.content or "").strip()

            if final_response:
                if "<think>" in final_response:
@@ -8612,8 +8587,8 @@ class AIAgent:
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    preserve_dots=self._anthropic_preserve_dots())
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_retry_nr.content or "").strip()
+                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_retry_result.content or "").strip()
                else:
                    summary_kwargs = {
                        "model": self.model,
@@ -8627,8 +8602,8 @@ class AIAgent:
                        summary_kwargs["extra_body"] = summary_extra_body

                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)
-                    _retry_cc_nr = self._get_transport().normalize_response(summary_response)
-                    final_response = (_retry_cc_nr.content or "").strip()
+                    _retry_result = self._get_transport().normalize_response(summary_response)
+                    final_response = (_retry_result.content or "").strip()

                if final_response:
                    if "<think>" in final_response:
@@ -8650,9 +8625,9 @@ class AIAgent:
        self,
        user_message: str,
        system_message: str = None,
-        conversation_history: List[Dict[str, Any]] = None,
+        conversation_history: List[Dict[str, Any]] | None = None,
        task_id: str = None,
-        stream_callback: Optional[callable] = None,
+        stream_callback: Optional[Callable[..., Any]] = None,
        persist_user_message: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
@@ -9657,13 +9632,13 @@ class AIAgent:
                    elif self.api_mode == "bedrock_converse":
                        # Bedrock response already normalized at dispatch — use transport
                        _bt_fr = self._get_transport()
-                        _bt_fr_nr = _bt_fr.normalize_response(response)
-                        finish_reason = _bt_fr_nr.finish_reason
+                        _bedrock_result = _bt_fr.normalize_response(response)
+                        finish_reason = _bedrock_result.finish_reason
                    else:
                        _cc_fr = self._get_transport()
-                        _cc_fr_nr = _cc_fr.normalize_response(response)
-                        finish_reason = _cc_fr_nr.finish_reason
-                        assistant_message = self._nr_to_assistant_message(_cc_fr_nr)
+                        _finish_result = _cc_fr.normalize_response(response)
+                        finish_reason = _finish_result.finish_reason
+                        assistant_message = _finish_result
                        if self._should_treat_stop_as_truncated(
                            finish_reason,
                            assistant_message,
@@ -9688,12 +9663,12 @@ class AIAgent:
                        _trunc_msg = None
                        _trunc_transport = self._get_transport()
                        if self.api_mode == "anthropic_messages":
-                            _trunc_nr = _trunc_transport.normalize_response(
+                            _trunc_result = _trunc_transport.normalize_response(
                                response, strip_tool_prefix=self._is_anthropic_oauth
                            )
                        else:
-                            _trunc_nr = _trunc_transport.normalize_response(response)
-                        _trunc_msg = self._nr_to_assistant_message(_trunc_nr)
+                            _trunc_result = _trunc_transport.normalize_response(response)
+                        _trunc_msg = _trunc_result

                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
                        _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@@ -10256,7 +10231,7 @@ class AIAgent:
                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
                        print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.")
                        print(f"{self.log_prefix}   Auth method: {auth_method}")
-                        print(f"{self.log_prefix}   Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix}   Token: (empty or short)")
+                        print(f"{self.log_prefix}   Token prefix: {str(key)[:12]}..." if key and len(str(key)) > 12 else f"{self.log_prefix}   Token: (empty or short)")
                        print(f"{self.log_prefix}   Troubleshooting:")
                        from hermes_constants import display_hermes_home as _dhh_fn
                        _dhh = _dhh_fn()
@@ -10928,9 +10903,9 @@ class AIAgent:
                _normalize_kwargs = {}
                if self.api_mode == "anthropic_messages":
                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
-                _nr = _transport.normalize_response(response, **_normalize_kwargs)
-                assistant_message = self._nr_to_assistant_message(_nr)
-                finish_reason = _nr.finish_reason
+                normalized = _transport.normalize_response(response, **_normalize_kwargs)
+                assistant_message = normalized
+                finish_reason = normalized.finish_reason
                
                # Normalize content to string — some OpenAI-compatible servers
                # (llama-server, etc.) return content as a dict or list instead
@@ -11600,7 +11575,7 @@ class AIAgent:
                        messages.append(assistant_msg)

                        if reasoning_text:
-                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
+                            reasoning_preview = str(reasoning_text)[:500] + "..." if len(str(reasoning_text)) > 500 else reasoning_text
                            logger.warning(
                                "Reasoning-only response (no visible content) "
                                "after exhausting retries and fallback. "
@@ -11939,7 +11914,7 @@ class AIAgent:

        return result

-    def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
+    def chat(self, message: str, stream_callback: Optional[Callable[..., Any]] = None) -> str:
        """
        Simple chat interface that returns just the final response.

@@ -20,9 +20,13 @@ Usage:
    python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
 """

+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 import json
 import logging
-import os
 import time
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -1126,7 +1130,7 @@ def main(
    num_workers: int = 4,
    resume: bool = False,
    verbose: bool = False,
-    list_distributions: bool = False,
+    show_distributions: bool = False,
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
@@ -1154,7 +1158,7 @@ def main(
        num_workers (int): Number of parallel worker processes (default: 4)
        resume (bool): Resume from checkpoint if run was interrupted (default: False)
        verbose (bool): Enable verbose logging (default: False)
-        list_distributions (bool): List available toolset distributions and exit
+        show_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
@@ -1186,10 +1190,10 @@ def main(
                               --prefill_messages_file=configs/prefill_opus.json
        
        # List available distributions
-        python batch_runner.py --list_distributions
+        python batch_runner.py --show_distributions
    """
    # Handle list distributions
-    if list_distributions:
+    if show_distributions:
        from toolset_distributions import print_distribution_info

        print("📊 Available Toolset Distributions")
@@ -26,10 +26,13 @@ Usage:
    python mini_swe_runner.py --prompts_file prompts.jsonl --output_file trajectories.jsonl --env docker
 """

-import json
-import logging
 import os
 import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import json
+import logging
 import time
 import uuid
 from datetime import datetime
@@ -26,6 +26,7 @@ import shutil
 import subprocess
 import sys
 from collections import defaultdict
+from typing import Optional
 from datetime import datetime
 from pathlib import Path

@@ -43,7 +44,9 @@ AUTHOR_MAP = {
    "teknium1@gmail.com": "teknium1",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "343873859@qq.com": "DrStrangerUJN",
    # contributors (from noreply pattern)
+    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
    "snreynolds2506@gmail.com": "snreynolds",
    "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
@@ -98,6 +101,7 @@ AUTHOR_MAP = {
    "30841158+n-WN@users.noreply.github.com": "n-WN",
    "tsuijinglei@gmail.com": "hiddenpuppy",
    "jerome@clawwork.ai": "HiddenPuppy",
+    "wysie@users.noreply.github.com": "Wysie",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
    "i@troy-y.org": "TroyMitchell911",
@@ -106,6 +110,7 @@ AUTHOR_MAP = {
    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
    "ben.burtenshaw@gmail.com": "burtenshaw",
    "roopaknijhara@gmail.com": "rnijhara",
+    "josephzcan@gmail.com": "j0sephz",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
    "liujinkun@bytedance.com": "liujinkun2025",
@@ -371,6 +376,38 @@ AUTHOR_MAP = {
    "projectadmin@wit.id": "projectadmin-dev",
    "mrigankamondal10@gmail.com": "Dev-Mriganka",
    "132275809+shushuzn@users.noreply.github.com": "shushuzn",
+    "ibrahimozsarac@gmail.com": "iborazzi",
+    "130149563+A-afflatus@users.noreply.github.com": "A-afflatus",
+    "huangkwell@163.com": "huangke19",
+    "tanishq@exa.ai": "10ishq",
+    "363708+christopherwoodall@users.noreply.github.com": "christopherwoodall",
+    "zhang9w0v5@qq.com": "zhang9w0v5",
+    "fuleinist@outlook.com": "fuleinist",
+    "43494187+Llugaes@users.noreply.github.com": "Llugaes",
+    "fengtianyu88@users.noreply.github.com": "fengtianyu88",
+    "l.moncany@gmail.com": "lmoncany",
+    "fatinghenji@users.noreply.github.com": "fatinghenji",
+    "xin.peng.dr@gmail.com": "xinpengdr",
+    "mike@mikewaters.net": "mikewaters",
+    "65117428+WadydX@users.noreply.github.com": "WadydX",
+    "216480837+isaachuangGMICLOUD@users.noreply.github.com": "isaachuangGMICLOUD",
+    "nukuom976228@gmail.com": "hsy5571616",
+    "11462216+Nan93@users.noreply.github.com": "Nan93",
+    "l973401489@126.com": "zhouxiaoya12",
+    "373119611@qq.com": "roytian1217",
+    "brett@brettbrewer.com": "minorgod",
+    "67779267+wenhao7@users.noreply.github.com": "wenhao7",
+    "git@yzx9.xyz": "yzx9",
+    "nilesh@cloudgeni.us": "lvnilesh",
+    "63502660+azhengbot@users.noreply.github.com": "azhengbot",
+    "sharvil.saxena@gmail.com": "sharziki",
+    "yuanhe@minimaxi.com": "RyanLee-Dev",
+    "curtis992250@gmail.com": "TaroballzChen",
+    "92638503+Lind3ey@users.noreply.github.com": "Lind3ey",
+    "1352808998@qq.com": "phpoh",
+    "caliberoviv@gmail.com": "vivganes",
+    "michaelfackerell@gmail.com": "MikeFac",
+    "18024642@qq.com": "GuyCui",
 }


@@ -649,7 +686,7 @@ def get_commits(since_tag=None):
    return commits


-def get_pr_number(subject: str) -> str:
+def get_pr_number(subject: str) -> Optional[str]:
    """Extract PR number from commit subject if present."""
    match = re.search(r"#(\d+)", subject)
    if match:
@@ -19,18 +19,23 @@ Environment Variables:
    OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
 """

-import asyncio
 import os
 import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import asyncio
 from pathlib import Path

 import fire
 import yaml

+from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
+
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent / '.env'
+_project_env = Path(__file__).parent.parent / '.env'

 from hermes_cli.env_loader import load_hermes_dotenv

@@ -60,8 +65,6 @@ from tools.rl_training_tool import get_missing_keys
 # Config Loading
 # ============================================================================

-from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
-
 DEFAULT_MODEL = "anthropic/claude-opus-4.5"
 DEFAULT_BASE_URL = OPENROUTER_BASE_URL

@@ -267,7 +267,7 @@ def run_compression(input_dir: Path, output_dir: Path, config_path: str):
    # Import the compressor
    import sys
    sys.path.insert(0, str(Path(__file__).parent.parent))
-    from trajectory_compressor import TrajectoryCompressor, CompressionConfig
+    from scripts.trajectory_compressor import TrajectoryCompressor, CompressionConfig
    
    print(f"\n🗜️  Running trajectory compression...")
    print(f"   Input: {input_dir}")
@@ -30,14 +30,18 @@ Usage:
    python trajectory_compressor.py --input=data/my_run --sample_percent=10
 """

-import json
 import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import json
 import time
 import yaml
 import logging
 import asyncio
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple, Callable
+from typing import List, Dict, Any, Optional, Tuple, Callable, cast
 from dataclasses import dataclass, field
 from datetime import datetime

@@ -52,7 +56,7 @@ from agent.retry_utils import jittered_backoff
 from hermes_cli.env_loader import load_hermes_dotenv

 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent / ".env"
+_project_env = Path(__file__).parent.parent / ".env"
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


@@ -75,7 +79,7 @@ def _effective_temperature_for_model(
    if fixed_temperature is OMIT_TEMPERATURE:
        return None  # caller must omit temperature
    if fixed_temperature is not None:
-        return fixed_temperature
+        return cast(float, fixed_temperature)
    return requested_temperature


@@ -607,11 +611,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import call_llm
+                    _call_llm_kwargs: dict = {}
+                    if summary_temperature is not None:
+                        _call_llm_kwargs["temperature"] = summary_temperature
                    response = call_llm(
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
+                        **_call_llm_kwargs,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
@@ -623,20 +630,21 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                    if summary_temperature is not None:
                        _create_kwargs["temperature"] = summary_temperature
                    response = self.client.chat.completions.create(**_create_kwargs)
-                
+
                summary = self._coerce_summary_content(response.choices[0].message.content)
                return self._ensure_summary_prefix(summary)
-                
+
            except Exception as e:
                metrics.summarization_errors += 1
                self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
-                
+
                if attempt < self.config.max_retries - 1:
                    time.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                else:
                    # Fallback: create a basic summary
                    return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
-    
+        raise AssertionError("unreachable: retry loop exhausted")
+
    async def _generate_summary_async(self, content: str, metrics: TrajectoryMetrics) -> str:
        """
        Generate a summary of the compressed turns using OpenRouter (async version).
@@ -676,11 +684,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import async_call_llm
+                    _async_llm_kwargs: dict = {}
+                    if summary_temperature is not None:
+                        _async_llm_kwargs["temperature"] = summary_temperature
                    response = await async_call_llm(
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
+                        **_async_llm_kwargs,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
@@ -692,20 +703,21 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                    if summary_temperature is not None:
                        _create_kwargs["temperature"] = summary_temperature
                    response = await self._get_async_client().chat.completions.create(**_create_kwargs)
-                
+
                summary = self._coerce_summary_content(response.choices[0].message.content)
                return self._ensure_summary_prefix(summary)
-                
+
            except Exception as e:
                metrics.summarization_errors += 1
                self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
-                
+
                if attempt < self.config.max_retries - 1:
                    await asyncio.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                else:
                    # Fallback: create a basic summary
                    return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
-    
+        raise AssertionError("unreachable: retry loop exhausted")
+
    def compress_trajectory(
        self,
        trajectory: List[Dict[str, str]]
@@ -8,7 +8,7 @@ metadata:
  hermes:
    tags: [wiki, knowledge-base, research, notes, markdown, rag-alternative]
    category: research
-    related_skills: [obsidian, arxiv, agentic-research-ideas]
+    related_skills: [obsidian, arxiv]
 ---

 # Karpathy's LLM Wiki
@@ -18,12 +18,12 @@ from agent.anthropic_adapter import (
    convert_messages_to_anthropic,
    convert_tools_to_anthropic,
    is_claude_code_token_valid,
-    normalize_anthropic_response,
    normalize_model_name,
    read_claude_code_credentials,
    resolve_anthropic_token,
    run_oauth_setup_token,
 )
+from agent.transports import get_transport


 # ---------------------------------------------------------------------------
@@ -1242,10 +1242,10 @@ class TestNormalizeResponse:

    def test_text_response(self):
        block = SimpleNamespace(type="text", text="Hello world")
-        msg, reason = normalize_anthropic_response(self._make_response([block]))
-        assert msg.content == "Hello world"
-        assert reason == "stop"
-        assert msg.tool_calls is None
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response([block]))
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+        assert nr.tool_calls is None

    def test_tool_use_response(self):
        blocks = [
@@ -1257,24 +1257,24 @@ class TestNormalizeResponse:
                input={"query": "test"},
            ),
        ]
-        msg, reason = normalize_anthropic_response(
+        nr = get_transport("anthropic_messages").normalize_response(
            self._make_response(blocks, "tool_use")
        )
-        assert msg.content == "Searching..."
-        assert reason == "tool_calls"
-        assert len(msg.tool_calls) == 1
-        assert msg.tool_calls[0].function.name == "search"
-        assert json.loads(msg.tool_calls[0].function.arguments) == {"query": "test"}
+        assert nr.content == "Searching..."
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "search"
+        assert json.loads(nr.tool_calls[0].arguments) == {"query": "test"}

    def test_thinking_response(self):
        blocks = [
            SimpleNamespace(type="thinking", thinking="Let me reason about this..."),
            SimpleNamespace(type="text", text="The answer is 42."),
        ]
-        msg, reason = normalize_anthropic_response(self._make_response(blocks))
-        assert msg.content == "The answer is 42."
-        assert msg.reasoning == "Let me reason about this..."
-        assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
+        assert nr.content == "The answer is 42."
+        assert nr.reasoning == "Let me reason about this..."
+        assert nr.provider_data["reasoning_details"] == [{"type": "thinking", "thinking": "Let me reason about this..."}]

    def test_thinking_response_preserves_signature(self):
        blocks = [
@@ -1285,24 +1285,24 @@ class TestNormalizeResponse:
                redacted=False,
            ),
        ]
-        msg, _ = normalize_anthropic_response(self._make_response(blocks))
-        assert msg.reasoning_details[0]["signature"] == "opaque_signature"
-        assert msg.reasoning_details[0]["thinking"] == "Let me reason about this..."
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
+        assert nr.provider_data["reasoning_details"][0]["signature"] == "opaque_signature"
+        assert nr.provider_data["reasoning_details"][0]["thinking"] == "Let me reason about this..."

    def test_stop_reason_mapping(self):
        block = SimpleNamespace(type="text", text="x")
-        _, r1 = normalize_anthropic_response(
+        nr1 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "end_turn")
        )
-        _, r2 = normalize_anthropic_response(
+        nr2 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "tool_use")
        )
-        _, r3 = normalize_anthropic_response(
+        nr3 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "max_tokens")
        )
-        assert r1 == "stop"
-        assert r2 == "tool_calls"
-        assert r3 == "length"
+        assert nr1.finish_reason == "stop"
+        assert nr2.finish_reason == "tool_calls"
+        assert nr3.finish_reason == "length"

    def test_stop_reason_refusal_and_context_exceeded(self):
        # Claude 4.5+ introduced two new stop_reason values the Messages API
@@ -1310,24 +1310,24 @@ class TestNormalizeResponse:
        # handlers already understand, instead of silently collapsing to
        # "stop" (old behavior).
        block = SimpleNamespace(type="text", text="")
-        _, refusal_reason = normalize_anthropic_response(
+        nr_refusal = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "refusal")
        )
-        _, overflow_reason = normalize_anthropic_response(
+        nr_overflow = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "model_context_window_exceeded")
        )
-        assert refusal_reason == "content_filter"
-        assert overflow_reason == "length"
+        assert nr_refusal.finish_reason == "content_filter"
+        assert nr_overflow.finish_reason == "length"

    def test_no_text_content(self):
        block = SimpleNamespace(
            type="tool_use", id="tc_1", name="search", input={"q": "hi"}
        )
-        msg, reason = normalize_anthropic_response(
+        nr = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "tool_use")
        )
-        assert msg.content is None
-        assert len(msg.tool_calls) == 1
+        assert nr.content is None
+        assert len(nr.tool_calls) == 1


 # ---------------------------------------------------------------------------
@@ -1162,3 +1162,75 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch)

    assert not pool.has_credentials()
    assert pool.entries() == []
+
+
+def _build_pool_with_entries(tmp_path, monkeypatch, provider="openrouter", entries=None):
+    """Helper: build a CredentialPool directly without seeding side-effects."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr("agent.credential_pool._seed_from_singletons", lambda p, e: (False, set()))
+    monkeypatch.setattr("agent.credential_pool._seed_from_env", lambda p, e: (False, set()))
+    if entries is None:
+        entries = [
+            {
+                "id": "cred-1",
+                "label": "primary",
+                "auth_type": "api_key",
+                "priority": 0,
+                "source": "manual",
+                "access_token": "tok-1",
+            },
+            {
+                "id": "cred-2",
+                "label": "secondary",
+                "auth_type": "api_key",
+                "priority": 1,
+                "source": "manual",
+                "access_token": "tok-2",
+            },
+        ]
+    _write_auth_store(tmp_path, {"version": 1, "credential_pool": {provider: entries}})
+    from agent.credential_pool import load_pool
+    return load_pool(provider)
+
+
+def test_remove_entry_removes_by_id(tmp_path, monkeypatch):
+    """remove_entry should remove the entry with matching id and return it."""
+    pool = _build_pool_with_entries(tmp_path, monkeypatch)
+
+    removed = pool.remove_entry("cred-1")
+
+    assert removed is not None
+    assert removed.id == "cred-1"
+    remaining_ids = [e.id for e in pool.entries()]
+    assert "cred-1" not in remaining_ids
+    assert "cred-2" in remaining_ids
+
+
+def test_remove_entry_returns_none_for_unknown_id(tmp_path, monkeypatch):
+    """remove_entry returns None when no entry matches the given id."""
+    pool = _build_pool_with_entries(tmp_path, monkeypatch)
+
+    result = pool.remove_entry("nonexistent-id")
+
+    assert result is None
+    # Pool should still have both original entries
+    assert len(pool.entries()) == 2
+
+
+def test_remove_entry_renumbers_priorities(tmp_path, monkeypatch):
+    """After remove_entry, remaining entries receive sequential priorities 0, 1, ..."""
+    pool = _build_pool_with_entries(
+        tmp_path,
+        monkeypatch,
+        entries=[
+            {"id": "cred-1", "label": "a", "auth_type": "api_key", "priority": 0, "source": "manual", "access_token": "tok-1"},
+            {"id": "cred-2", "label": "b", "auth_type": "api_key", "priority": 1, "source": "manual", "access_token": "tok-2"},
+            {"id": "cred-3", "label": "c", "auth_type": "api_key", "priority": 2, "source": "manual", "access_token": "tok-3"},
+        ],
+    )
+
+    pool.remove_entry("cred-2")
+
+    remaining = sorted(pool.entries(), key=lambda e: e.priority)
+    assert [e.priority for e in remaining] == [0, 1]
+    assert [e.id for e in remaining] == ["cred-1", "cred-3"]
@@ -807,6 +807,24 @@ class TestPromptBuilderConstants:
        # check that this test is calibrated correctly).
        assert "include MEDIA:" in PLATFORM_HINTS["telegram"]

+    def test_platform_hints_mattermost(self):
+        hint = PLATFORM_HINTS["mattermost"]
+        assert "Mattermost" in hint
+        assert "MEDIA:" in hint
+        assert "Markdown" in hint
+
+    def test_platform_hints_matrix(self):
+        hint = PLATFORM_HINTS["matrix"]
+        assert "Matrix" in hint
+        assert "MEDIA:" in hint
+        assert "Markdown" in hint
+
+    def test_platform_hints_feishu(self):
+        hint = PLATFORM_HINTS["feishu"]
+        assert "Feishu" in hint
+        assert "MEDIA:" in hint
+        assert "Markdown" in hint
+

 # =========================================================================
 # Environment hints
@@ -149,3 +149,95 @@ class TestMapFinishReason:

    def test_none_reason(self):
        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
+
+
+# ---------------------------------------------------------------------------
+# Backward-compat property tests
+# ---------------------------------------------------------------------------
+
+class TestToolCallBackwardCompat:
+    """Test duck-typing properties that let ToolCall pass through code expecting
+    the old SimpleNamespace(id, type, function=SimpleNamespace(name, arguments)) shape."""
+
+    def test_type_is_function(self):
+        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
+        assert tc.type == "function"
+
+    def test_function_returns_self(self):
+        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
+        assert tc.function is tc
+
+    def test_function_name_matches(self):
+        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
+        assert tc.function.name == "search"
+        assert tc.function.name == tc.name
+
+    def test_function_arguments_matches(self):
+        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
+        assert tc.function.arguments == '{"q":"test"}'
+        assert tc.function.arguments == tc.arguments
+
+    def test_call_id_from_provider_data(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
+        assert tc.call_id == "c1"
+
+    def test_call_id_none_when_no_provider_data(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
+        assert tc.call_id is None
+
+    def test_response_item_id_from_provider_data(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"response_item_id": "r1"})
+        assert tc.response_item_id == "r1"
+
+    def test_response_item_id_none_when_missing(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
+        assert tc.response_item_id is None
+
+    def test_getattr_pattern_matches_agent_loop(self):
+        """run_agent.py uses getattr(tool_call, 'call_id', None) — verify it works."""
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
+        assert getattr(tc, "call_id", None) == "c1"
+        tc_no_pd = ToolCall(id="1", name="fn", arguments="{}")
+        assert getattr(tc_no_pd, "call_id", None) is None
+
+
+class TestNormalizedResponseBackwardCompat:
+    """Test properties that replaced _nr_to_assistant_message() shim."""
+
+    def test_reasoning_content_from_provider_data(self):
+        nr = NormalizedResponse(
+            content="hi", tool_calls=None, finish_reason="stop",
+            provider_data={"reasoning_content": "thought process"},
+        )
+        assert nr.reasoning_content == "thought process"
+
+    def test_reasoning_content_none_when_absent(self):
+        nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
+        assert nr.reasoning_content is None
+
+    def test_reasoning_details_from_provider_data(self):
+        details = [{"type": "thinking", "thinking": "hmm"}]
+        nr = NormalizedResponse(
+            content="hi", tool_calls=None, finish_reason="stop",
+            provider_data={"reasoning_details": details},
+        )
+        assert nr.reasoning_details == details
+
+    def test_reasoning_details_none_when_no_provider_data(self):
+        nr = NormalizedResponse(
+            content="hi", tool_calls=None, finish_reason="stop",
+            provider_data=None,
+        )
+        assert nr.reasoning_details is None
+
+    def test_codex_reasoning_items_from_provider_data(self):
+        items = ["item1", "item2"]
+        nr = NormalizedResponse(
+            content="hi", tool_calls=None, finish_reason="stop",
+            provider_data={"codex_reasoning_items": items},
+        )
+        assert nr.codex_reasoning_items == items
+
+    def test_codex_reasoning_items_none_when_absent(self):
+        nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
+        assert nr.codex_reasoning_items is None
@@ -164,7 +164,7 @@ class TestArceeURLMapping:
        assert "arceeai" in _PROVIDER_PREFIXES

    def test_trajectory_compressor_detects_arcee(self):
-        import trajectory_compressor as tc
+        import scripts.trajectory_compressor as tc
        comp = tc.TrajectoryCompressor.__new__(tc.TrajectoryCompressor)
        comp.config = types.SimpleNamespace(base_url="https://api.arcee.ai/api/v1")
        assert comp._detect_provider() == "arcee"
@@ -5,6 +5,8 @@ import pwd
 from pathlib import Path
 from types import SimpleNamespace

+import pytest
+
 import hermes_cli.gateway as gateway_cli
 from gateway.restart import (
    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
@@ -1083,6 +1085,116 @@ class TestEnsureUserSystemdEnv:
        assert calls == []


+class TestPreflightUserSystemd:
+    """Tests for _preflight_user_systemd() — D-Bus reachability before systemctl --user.
+
+    Covers issue #5130 / Rick's RHEL 9.6 SSH scenario: setup tries to start the
+    gateway via ``systemctl --user start`` in a shell with no user D-Bus session,
+    which previously failed with a raw ``CalledProcessError`` and no remediation.
+    """
+
+    def test_noop_when_bus_socket_exists(self, monkeypatch):
+        """Socket already there (desktop / linger + prior login) → no-op."""
+        monkeypatch.setattr(
+            gateway_cli, "_user_dbus_socket_path",
+            lambda: type("P", (), {"exists": lambda self: True})(),
+        )
+        # Should not raise, no subprocess calls needed.
+        gateway_cli._preflight_user_systemd()
+
+    def test_raises_when_linger_disabled_and_loginctl_denied(self, monkeypatch):
+        """Rick's scenario: no D-Bus, no linger, non-root SSH → clear error."""
+        monkeypatch.setattr(
+            gateway_cli, "_user_dbus_socket_path",
+            lambda: type("P", (), {"exists": lambda self: False})(),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_linger_status", lambda: (False, ""),
+        )
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: "/usr/bin/loginctl")
+
+        class _Result:
+            returncode = 1
+            stdout = ""
+            stderr = "Interactive authentication required."
+
+        monkeypatch.setattr(
+            gateway_cli.subprocess, "run", lambda *a, **kw: _Result(),
+        )
+
+        with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info:
+            gateway_cli._preflight_user_systemd()
+
+        msg = str(exc_info.value)
+        assert "sudo loginctl enable-linger" in msg
+        assert "hermes gateway run" in msg  # foreground fallback mentioned
+        assert "Interactive authentication required" in msg
+
+    def test_raises_when_loginctl_missing(self, monkeypatch):
+        """No loginctl binary at all → suggest sudo install + manual fix."""
+        monkeypatch.setattr(
+            gateway_cli, "_user_dbus_socket_path",
+            lambda: type("P", (), {"exists": lambda self: False})(),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_linger_status",
+            lambda: (None, "loginctl not found"),
+        )
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: None)
+
+        with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info:
+            gateway_cli._preflight_user_systemd()
+
+        assert "sudo loginctl enable-linger" in str(exc_info.value)
+
+    def test_linger_enabled_but_socket_still_missing(self, monkeypatch):
+        """Edge case: linger says yes but the bus socket never came up."""
+        monkeypatch.setattr(
+            gateway_cli, "_user_dbus_socket_path",
+            lambda: type("P", (), {"exists": lambda self: False})(),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_linger_status", lambda: (True, ""),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "_wait_for_user_dbus_socket", lambda timeout=3.0: False,
+        )
+
+        with pytest.raises(gateway_cli.UserSystemdUnavailableError) as exc_info:
+            gateway_cli._preflight_user_systemd()
+
+        assert "linger is enabled" in str(exc_info.value)
+
+    def test_enable_linger_succeeds_and_socket_appears(self, monkeypatch, capsys):
+        """Happy remediation path: polkit allows enable-linger, socket spawns."""
+        monkeypatch.setattr(
+            gateway_cli, "_user_dbus_socket_path",
+            lambda: type("P", (), {"exists": lambda self: False})(),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_systemd_linger_status", lambda: (False, ""),
+        )
+        monkeypatch.setattr(gateway_cli.shutil, "which", lambda _: "/usr/bin/loginctl")
+
+        class _OkResult:
+            returncode = 0
+            stdout = ""
+            stderr = ""
+
+        monkeypatch.setattr(
+            gateway_cli.subprocess, "run", lambda *a, **kw: _OkResult(),
+        )
+        monkeypatch.setattr(
+            gateway_cli, "_wait_for_user_dbus_socket",
+            lambda timeout=5.0: True,
+        )
+
+        # Should not raise.
+        gateway_cli._preflight_user_systemd()
+        out = capsys.readouterr().out
+        assert "Enabled linger" in out
+
+
 class TestProfileArg:
    """Tests for _profile_arg — returns '--profile <name>' for named profiles."""

@@ -0,0 +1,245 @@
+"""Tests for --ignore-user-config and --ignore-rules flags on `hermes chat`.
+
+Ported from openai/codex#18646 (`feat: add --ignore-user-config and --ignore-rules`).
+Codex's flags fully isolate a run from user-level config and exec-policy .rules
+files. In Hermes the equivalent isolation is:
+
+* ``--ignore-user-config`` → skip ``~/.hermes/config.yaml`` in ``load_cli_config()``
+  (credentials in ``.env`` are still loaded).
+* ``--ignore-rules`` → skip AGENTS.md / SOUL.md / .cursorrules auto-injection
+  and persistent memory (maps to ``AIAgent(skip_context_files=True,
+  skip_memory=True)``).
+
+Both flags are wired via env vars so they work cleanly across the
+argparse → cmd_chat → cli.main() → HermesCLI → AIAgent call chain.
+"""
+
+from __future__ import annotations
+
+import os
+import textwrap
+import importlib
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Ensure the two env-var gates start AND end each test in a known state.
+
+    Some tests here write directly to ``os.environ`` (mirroring the real
+    ``cmd_chat`` logic), so ``monkeypatch.delenv`` alone isn't enough —
+    those writes aren't tracked by monkeypatch and won't be undone by it.
+    We add explicit cleanup on yield to prevent cross-test pollution.
+    """
+    for var in ("HERMES_IGNORE_USER_CONFIG", "HERMES_IGNORE_RULES"):
+        monkeypatch.delenv(var, raising=False)
+    yield
+    for var in ("HERMES_IGNORE_USER_CONFIG", "HERMES_IGNORE_RULES"):
+        os.environ.pop(var, None)
+
+
+class TestIgnoreUserConfigEnvGate:
+    """``load_cli_config()`` must honour ``HERMES_IGNORE_USER_CONFIG=1``.
+
+    When the env var is set, user config at ``<hermes_home>/config.yaml`` is
+    skipped even if present — the function returns only the built-in defaults
+    (merged with the project-level ``cli-config.yaml`` fallback).
+    """
+
+    def _write_user_config(self, tmp_path, model_default):
+        config_yaml = textwrap.dedent(
+            f"""
+            model:
+              default: {model_default}
+              provider: openrouter
+            agent:
+              system_prompt: "from user config"
+            """
+        ).lstrip()
+        (tmp_path / "config.yaml").write_text(config_yaml)
+
+    def _reload_cli(self, monkeypatch, tmp_path):
+        """Point cli._hermes_home at tmp_path and return a fresh load_cli_config."""
+        import cli
+        monkeypatch.setattr(cli, "_hermes_home", tmp_path)
+        return cli.load_cli_config
+
+    def test_user_config_loaded_when_flag_unset(self, tmp_path, monkeypatch):
+        self._write_user_config(tmp_path, "anthropic/claude-sonnet-4.6")
+        load_cli_config = self._reload_cli(monkeypatch, tmp_path)
+
+        cfg = load_cli_config()
+
+        # User config value wins
+        assert cfg["model"]["default"] == "anthropic/claude-sonnet-4.6"
+        assert cfg["agent"]["system_prompt"] == "from user config"
+
+    def test_user_config_skipped_when_flag_set(self, tmp_path, monkeypatch):
+        """With HERMES_IGNORE_USER_CONFIG=1, user config.yaml is ignored.
+
+        The built-in default ``model.default`` is empty string (no user override),
+        and the user's ``agent.system_prompt`` is not seen.
+        """
+        self._write_user_config(tmp_path, "anthropic/claude-sonnet-4.6")
+        monkeypatch.setenv("HERMES_IGNORE_USER_CONFIG", "1")
+
+        load_cli_config = self._reload_cli(monkeypatch, tmp_path)
+        cfg = load_cli_config()
+
+        # User-set "system_prompt: from user config" MUST NOT leak through
+        assert cfg["agent"].get("system_prompt", "") != "from user config"
+
+        # User-set model.default MUST NOT leak through — either the built-in
+        # default ("" or unset) or a project-level fallback, but never the
+        # user's value
+        assert cfg["model"].get("default", "") != "anthropic/claude-sonnet-4.6"
+
+    def test_flag_ignored_when_set_to_other_value(self, tmp_path, monkeypatch):
+        """Only the literal value "1" activates the bypass, matching the yolo pattern."""
+        self._write_user_config(tmp_path, "anthropic/claude-sonnet-4.6")
+        monkeypatch.setenv("HERMES_IGNORE_USER_CONFIG", "true")  # not "1"
+
+        load_cli_config = self._reload_cli(monkeypatch, tmp_path)
+        cfg = load_cli_config()
+
+        # "true" != "1", so user config IS loaded
+        assert cfg["model"]["default"] == "anthropic/claude-sonnet-4.6"
+
+
+class TestIgnoreRulesEnvGate:
+    """The constructor / env var must propagate to ``HermesCLI.ignore_rules``
+    so ``AIAgent`` is built with ``skip_context_files=True`` and
+    ``skip_memory=True``.
+    """
+
+    def test_env_var_enables_ignore_rules(self, monkeypatch):
+        """Setting HERMES_IGNORE_RULES=1 flips HermesCLI.ignore_rules True."""
+        monkeypatch.setenv("HERMES_IGNORE_RULES", "1")
+
+        # Import HermesCLI lazily — cli.py has heavy module-init side effects
+        # that we don't want to run at test collection time.
+        import cli
+        importlib.reload(cli)
+
+        # Build only enough of HermesCLI to reach the ignore_rules assignment.
+        # The full __init__ pulls in provider/auth/session DB, so we cheat:
+        # create the object via object.__new__ and manually run the assignment
+        # the same way the real constructor does.
+        obj = object.__new__(cli.HermesCLI)
+        # Replicate the exact logic from cli.py HermesCLI.__init__:
+        ignore_rules = False  # constructor default
+        obj.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
+
+        assert obj.ignore_rules is True
+
+    def test_constructor_flag_alone_enables_ignore_rules(self, monkeypatch):
+        monkeypatch.delenv("HERMES_IGNORE_RULES", raising=False)
+        import cli
+        obj = object.__new__(cli.HermesCLI)
+        ignore_rules = True  # constructor argument
+        obj.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
+        assert obj.ignore_rules is True
+
+    def test_neither_flag_nor_env_leaves_rules_enabled(self, monkeypatch):
+        monkeypatch.delenv("HERMES_IGNORE_RULES", raising=False)
+        import cli
+        obj = object.__new__(cli.HermesCLI)
+        ignore_rules = False
+        obj.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
+        assert obj.ignore_rules is False
+
+
+class TestCmdChatWiring:
+    """The wiring inside ``cmd_chat()`` in ``hermes_cli/main.py`` must set
+    both env vars before importing ``cli`` (which evaluates
+    ``load_cli_config()`` at module import).
+    """
+
+    def _simulate_cmd_chat_env_setup(self, args):
+        """Replicate the exact snippet from cmd_chat in main.py."""
+        if getattr(args, "ignore_user_config", False):
+            os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
+        if getattr(args, "ignore_rules", False):
+            os.environ["HERMES_IGNORE_RULES"] = "1"
+
+    def test_both_flags_set_both_env_vars(self, monkeypatch):
+        monkeypatch.delenv("HERMES_IGNORE_USER_CONFIG", raising=False)
+        monkeypatch.delenv("HERMES_IGNORE_RULES", raising=False)
+
+        class FakeArgs:
+            ignore_user_config = True
+            ignore_rules = True
+
+        self._simulate_cmd_chat_env_setup(FakeArgs())
+
+        assert os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
+        assert os.environ.get("HERMES_IGNORE_RULES") == "1"
+
+    def test_only_ignore_user_config(self, monkeypatch):
+        monkeypatch.delenv("HERMES_IGNORE_USER_CONFIG", raising=False)
+        monkeypatch.delenv("HERMES_IGNORE_RULES", raising=False)
+
+        class FakeArgs:
+            ignore_user_config = True
+            ignore_rules = False
+
+        self._simulate_cmd_chat_env_setup(FakeArgs())
+
+        assert os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
+        assert "HERMES_IGNORE_RULES" not in os.environ
+
+    def test_flags_absent_sets_nothing(self, monkeypatch):
+        monkeypatch.delenv("HERMES_IGNORE_USER_CONFIG", raising=False)
+        monkeypatch.delenv("HERMES_IGNORE_RULES", raising=False)
+
+        class FakeArgs:
+            pass  # no attributes at all — getattr fallback must handle
+
+        self._simulate_cmd_chat_env_setup(FakeArgs())
+
+        assert "HERMES_IGNORE_USER_CONFIG" not in os.environ
+        assert "HERMES_IGNORE_RULES" not in os.environ
+
+
+class TestArgparseFlagsRegistered:
+    """Verify the `chat` subparser actually exposes --ignore-user-config
+    and --ignore-rules. This is the contract test for the CLI surface.
+    """
+
+    def test_flags_present_in_chat_parser(self):
+        """Parse a synthetic chat invocation and check both attributes exist."""
+        # Minimal argparse tree matching the real chat subparser shape for the
+        # two flags under test. If someone removes the flag from main.py, this
+        # test keeps passing in isolation — but the E2E test below catches it.
+        import argparse
+        parser = argparse.ArgumentParser(prog="hermes")
+        subs = parser.add_subparsers(dest="command")
+        chat = subs.add_parser("chat")
+        chat.add_argument("--ignore-user-config", action="store_true", default=False)
+        chat.add_argument("--ignore-rules", action="store_true", default=False)
+
+        args = parser.parse_args(["chat", "--ignore-user-config", "--ignore-rules"])
+        assert args.ignore_user_config is True
+        assert args.ignore_rules is True
+
+    def test_main_py_registers_both_flags(self):
+        """E2E: the real hermes_cli/main.py parser accepts both flags.
+
+        We invoke the real argparse tree builder from hermes_cli.main.
+        """
+        import hermes_cli.main as hm
+
+        # hm has a helper that builds the argparse tree inside main().
+        # We can extract it by catching the SystemExit on --help.
+        # Simpler: just grep the source for the flag strings. Both approaches
+        # are brittle; we use a combined test.
+        import inspect
+        src = inspect.getsource(hm)
+        assert '"--ignore-user-config"' in src, \
+            "chat subparser must register --ignore-user-config"
+        assert '"--ignore-rules"' in src, \
+            "chat subparser must register --ignore-rules"
+        # And the cmd_chat env-var wiring must be present
+        assert "HERMES_IGNORE_USER_CONFIG" in src
+        assert "HERMES_IGNORE_RULES" in src
@@ -6,6 +6,8 @@ Covers `_plugin_image_gen_providers`, `_visible_providers`, and

 from __future__ import annotations

+from types import SimpleNamespace
+
 import pytest

 from agent import image_gen_registry
@@ -172,3 +174,78 @@ class TestConfigWriting:

        assert config["image_gen"]["provider"] == "noenv"
        assert config["image_gen"]["model"] == "noenv-model-v1"
+
+    def test_reconfiguring_plugin_provider_writes_provider_and_model(self, monkeypatch, tmp_path):
+        """The reconfigure path should switch image_gen away from managed FAL
+        and onto the selected plugin provider."""
+        from hermes_cli import tools_config
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        image_gen_registry.register_provider(_FakeProvider("testopenai"))
+        monkeypatch.setattr(tools_config, "_prompt_choice", lambda *a, **kw: 0)
+        monkeypatch.setattr(tools_config, "_prompt", lambda *a, **kw: "")
+        monkeypatch.setattr(
+            tools_config,
+            "get_env_value",
+            lambda key: "sk-test" if key == "OPENAI_API_KEY" else "",
+        )
+
+        config = {"image_gen": {"use_gateway": True}}
+        provider_row = {
+            "name": "OpenAI",
+            "env_vars": [{"key": "OPENAI_API_KEY", "prompt": "OpenAI API key"}],
+            "image_gen_plugin_name": "testopenai",
+        }
+
+        tools_config._reconfigure_provider(provider_row, config)
+
+        assert config["image_gen"]["provider"] == "testopenai"
+        assert config["image_gen"]["model"] == "testopenai-model-v1"
+        assert config["image_gen"]["use_gateway"] is False
+
+    def test_plugin_provider_active_overrides_managed_nous_active_label(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        monkeypatch.setattr(
+            tools_config,
+            "get_nous_subscription_features",
+            lambda config: SimpleNamespace(
+                features={"image_gen": SimpleNamespace(managed_by_nous=True)}
+            ),
+        )
+
+        config = {"image_gen": {"provider": "openai", "use_gateway": False}}
+        nous_row = {
+            "name": "Nous Subscription",
+            "managed_nous_feature": "image_gen",
+        }
+        openai_row = {
+            "name": "OpenAI",
+            "image_gen_plugin_name": "openai",
+        }
+
+        assert tools_config._is_provider_active(openai_row, config) is True
+        assert tools_config._is_provider_active(nous_row, config) is False
+
+    def test_reconfiguring_fal_clears_plugin_provider(self, monkeypatch):
+        from hermes_cli import tools_config
+
+        monkeypatch.setattr(tools_config, "_prompt_choice", lambda *a, **kw: 0)
+        monkeypatch.setattr(tools_config, "_prompt", lambda *a, **kw: "")
+        monkeypatch.setattr(
+            tools_config,
+            "get_env_value",
+            lambda key: "fal-key" if key == "FAL_KEY" else "",
+        )
+
+        config = {"image_gen": {"provider": "openai", "use_gateway": False}}
+        provider_row = {
+            "name": "FAL.ai",
+            "env_vars": [{"key": "FAL_KEY", "prompt": "FAL API key"}],
+            "imagegen_backend": "fal",
+        }
+
+        tools_config._reconfigure_provider(provider_row, config)
+
+        assert config["image_gen"]["provider"] == "fal"
+        assert config["image_gen"]["use_gateway"] is False
@@ -253,3 +253,148 @@ def test_list_dedupes_dict_model_matching_singular_default(monkeypatch):
    ds_rows = [p for p in providers if p["name"] == "DeepSeek"]
    assert ds_rows[0]["models"].count("deepseek-chat") == 1
    assert ds_rows[0]["models"] == ["deepseek-chat", "deepseek-reasoner"]
+
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# #9210: group custom_providers by (base_url, api_key) in /model picker
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_list_authenticated_providers_groups_same_endpoint(monkeypatch):
+    """Multiple custom_providers entries sharing a base_url+api_key must be
+    returned as a single picker row with all their models merged."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — MiniMax M2.7", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "minimax-m2.7"},
+            {"name": "Ollama — GLM 5.1",      "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 1, (
+        "Expected 1 group for shared endpoint, got "
+        f"{[p['slug'] for p in custom_groups]}"
+    )
+    group = custom_groups[0]
+    assert set(group["models"]) == {"minimax-m2.7", "glm-5.1", "qwen3-coder"}
+    assert group["total_models"] == 3
+    # Per-model suffix stripped from display name
+    assert group["name"] == "Ollama"
+
+
+def test_list_authenticated_providers_current_endpoint_uses_current_slug(monkeypatch):
+    """When current_base_url matches the grouped endpoint, the slug must
+    equal current_provider so picker selection routes through the live
+    credential pipeline."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="custom",
+        current_base_url="http://localhost:11434/v1",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+        ],
+        max_models=50,
+    )
+
+    matches = [p for p in providers if p.get("is_user_defined")]
+    assert len(matches) == 1
+    group = matches[0]
+    assert group["slug"] == "custom"
+    assert group["is_current"] is True
+
+
+def test_list_authenticated_providers_distinct_endpoints_stay_separate(monkeypatch):
+    """Entries with different base_urls must produce separate picker rows
+    even if some display names happen to be similar."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama — GLM 5.1", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "glm-5.1"},
+            {"name": "Moonshot", "base_url": "https://api.moonshot.cn/v1",
+             "api_key": "sk-m", "model": "moonshot-v1"},
+            {"name": "Ollama — Qwen3-coder", "base_url": "http://localhost:11434/v1",
+             "api_key": "ollama", "model": "qwen3-coder"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    # Ollama endpoint collapses to one row with both models
+    ollama = next(p for p in custom_groups if p["name"] == "Ollama")
+    assert set(ollama["models"]) == {"glm-5.1", "qwen3-coder"}
+    moonshot = next(p for p in custom_groups if p["name"] == "Moonshot")
+    assert moonshot["models"] == ["moonshot-v1"]
+
+
+def test_list_authenticated_providers_same_url_different_keys_disambiguated(monkeypatch):
+    """Two custom_providers entries with the same base_url but different
+    api_keys (and identical cleaned names) must both stay visible in the
+    picker — slug is suffixed to disambiguate."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=[
+            {"name": "OpenAI — key A", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-AAA", "model": "gpt-5.4"},
+            {"name": "OpenAI — key B", "base_url": "https://api.openai.com/v1",
+             "api_key": "sk-BBB", "model": "gpt-4.6"},
+        ],
+        max_models=50,
+    )
+
+    custom_groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(custom_groups) == 2
+    slugs = sorted(p["slug"] for p in custom_groups)
+    # First group keeps the base slug, second gets a numeric suffix
+    assert slugs == ["custom:openai", "custom:openai-2"]
+    # Each row has a distinct model
+    models = {p["slug"]: p["models"] for p in custom_groups}
+    assert models["custom:openai"] == ["gpt-5.4"]
+    assert models["custom:openai-2"] == ["gpt-4.6"]
+
+
+def test_list_authenticated_providers_total_models_reflects_grouped_count(monkeypatch):
+    """After grouping six entries into one row, total_models must reflect
+    the full count, and every grouped model appears in the list."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    entries = [
+        {"name": f"Ollama \u2014 Model {i}", "base_url": "http://localhost:11434/v1",
+         "api_key": "ollama", "model": f"model-{i}"}
+        for i in range(6)
+    ]
+    providers = list_authenticated_providers(
+        user_providers={},
+        custom_providers=entries,
+        max_models=4,
+    )
+
+    groups = [p for p in providers if p.get("is_user_defined")]
+    assert len(groups) == 1
+    group = groups[0]
+    assert group["total_models"] == 6
+    # All six models are preserved in the grouped row.
+    assert sorted(group["models"]) == sorted(f"model-{i}" for i in range(6))
@@ -455,6 +455,47 @@ class TestExportImport:
        with pytest.raises(FileExistsError):
            import_profile(str(archive_path), name="coder")

+    def test_import_with_explicit_name_does_not_mutate_existing_archive_root_profile(
+        self, profile_env, tmp_path
+    ):
+        create_profile("victim", no_alias=True)
+        victim_dir = get_profile_dir("victim")
+        (victim_dir / "marker.txt").write_text("original")
+
+        archive_path = tmp_path / "export" / "victim.tar.gz"
+        archive_path.parent.mkdir(parents=True, exist_ok=True)
+        with tarfile.open(archive_path, "w:gz") as tf:
+            data = b"imported"
+            info = tarfile.TarInfo("victim/marker.txt")
+            info.size = len(data)
+            tf.addfile(info, io.BytesIO(data))
+
+        imported = import_profile(str(archive_path), name="renamed")
+
+        assert imported == get_profile_dir("renamed")
+        assert (imported / "marker.txt").read_text() == "imported"
+        assert (victim_dir / "marker.txt").read_text() == "original"
+
+    def test_import_rejects_archive_with_multiple_top_level_directories(
+        self, profile_env, tmp_path
+    ):
+        archive_path = tmp_path / "export" / "multi-root.tar.gz"
+        archive_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with tarfile.open(archive_path, "w:gz") as tf:
+            for member_name, data in (
+                ("alpha/marker.txt", b"a"),
+                ("beta/marker.txt", b"b"),
+            ):
+                info = tarfile.TarInfo(member_name)
+                info.size = len(data)
+                tf.addfile(info, io.BytesIO(data))
+
+        with pytest.raises(ValueError, match="exactly one top-level directory"):
+            import_profile(str(archive_path), name="coder")
+
+        assert not get_profile_dir("coder").exists()
+
    def test_import_rejects_traversal_archive_member(self, profile_env, tmp_path):
        archive_path = tmp_path / "export" / "evil.tar.gz"
        archive_path.parent.mkdir(parents=True, exist_ok=True)
@@ -135,3 +135,48 @@ class TestNormalizeCustomProviderEntry:
        }
        result = _normalize_custom_provider_entry(entry, provider_key="")
        assert result is None
+
+    def test_models_list_converted_to_dict(self):
+        """List-format models should be preserved as an empty-value dict so
+        /model picks them up instead of showing the provider with (0) models."""
+        entry = {
+            "name": "tencent-coding-plan",
+            "base_url": "https://api.lkeap.cloud.tencent.com/coding/v3",
+            "models": ["glm-5", "kimi-k2.5", "minimax-m2.5"],
+        }
+        result = _normalize_custom_provider_entry(entry)
+        assert result is not None
+        assert result["models"] == {"glm-5": {}, "kimi-k2.5": {}, "minimax-m2.5": {}}
+
+    def test_models_dict_preserved(self):
+        """Dict-format models should pass through unchanged."""
+        entry = {
+            "name": "acme",
+            "base_url": "https://api.example.com/v1",
+            "models": {"gpt-foo": {"context_length": 32000}},
+        }
+        result = _normalize_custom_provider_entry(entry)
+        assert result is not None
+        assert result["models"] == {"gpt-foo": {"context_length": 32000}}
+
+    def test_models_list_filters_empty_and_non_string(self):
+        """List entries that are empty strings or non-strings are skipped."""
+        entry = {
+            "name": "acme",
+            "base_url": "https://api.example.com/v1",
+            "models": ["valid", "", None, 42, "  ", "also-valid"],
+        }
+        result = _normalize_custom_provider_entry(entry)
+        assert result is not None
+        assert result["models"] == {"valid": {}, "also-valid": {}}
+
+    def test_models_empty_list_omitted(self):
+        """Empty list (falsy) should not produce a models key."""
+        entry = {
+            "name": "acme",
+            "base_url": "https://api.example.com/v1",
+            "models": [],
+        }
+        result = _normalize_custom_provider_entry(entry)
+        assert result is not None
+        assert "models" not in result
@@ -1,172 +0,0 @@
-"""Unit tests for hermes_cli.pty_bridge — PTY spawning + byte forwarding.
-
-These tests drive the bridge with minimal POSIX processes (echo, env, sleep,
-printf) to verify it behaves like a PTY you can read/write/resize/close.
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-import time
-
-import pytest
-
-pytest.importorskip("ptyprocess", reason="ptyprocess not installed")
-
-from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
-
-
-skip_on_windows = pytest.mark.skipif(
-    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
-)
-
-
-def _read_until(bridge: PtyBridge, needle: bytes, timeout: float = 5.0) -> bytes:
-    """Accumulate PTY output until we see `needle` or time out."""
-    deadline = time.monotonic() + timeout
-    buf = bytearray()
-    while time.monotonic() < deadline:
-        chunk = bridge.read(timeout=0.2)
-        if chunk is None:
-            break
-        buf.extend(chunk)
-        if needle in buf:
-            return bytes(buf)
-    return bytes(buf)
-
-
-@skip_on_windows
-class TestPtyBridgeSpawn:
-    def test_is_available_on_posix(self):
-        assert PtyBridge.is_available() is True
-
-    def test_spawn_returns_bridge_with_pid(self):
-        bridge = PtyBridge.spawn(["true"])
-        try:
-            assert bridge.pid > 0
-        finally:
-            bridge.close()
-
-    def test_spawn_raises_on_missing_argv0(self, tmp_path):
-        with pytest.raises((FileNotFoundError, OSError)):
-            PtyBridge.spawn([str(tmp_path / "definitely-not-a-real-binary")])
-
-
-@skip_on_windows
-class TestPtyBridgeIO:
-    def test_reads_child_stdout(self):
-        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf hermes-ok"])
-        try:
-            output = _read_until(bridge, b"hermes-ok")
-            assert b"hermes-ok" in output
-        finally:
-            bridge.close()
-
-    def test_write_sends_to_child_stdin(self):
-        # `cat` with no args echoes stdin back to stdout.  We write a line,
-        # read it back, then signal EOF to let cat exit cleanly.
-        bridge = PtyBridge.spawn(["/bin/cat"])
-        try:
-            bridge.write(b"hello-pty\n")
-            output = _read_until(bridge, b"hello-pty")
-            assert b"hello-pty" in output
-        finally:
-            bridge.close()
-
-    def test_read_returns_none_after_child_exits(self):
-        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf done"])
-        try:
-            _read_until(bridge, b"done")
-            # Give the child a beat to exit cleanly, then drain until EOF.
-            deadline = time.monotonic() + 3.0
-            while bridge.is_alive() and time.monotonic() < deadline:
-                bridge.read(timeout=0.1)
-            # Next reads after exit should return None (EOF), not raise.
-            got_none = False
-            for _ in range(10):
-                if bridge.read(timeout=0.1) is None:
-                    got_none = True
-                    break
-            assert got_none, "PtyBridge.read did not return None after child EOF"
-        finally:
-            bridge.close()
-
-
-@skip_on_windows
-class TestPtyBridgeResize:
-    def test_resize_updates_child_winsize(self):
-        # tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
-        # Spawn a shell, resize, then ask tput for the dimensions.
-        bridge = PtyBridge.spawn(
-            ["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
-            cols=80,
-            rows=24,
-        )
-        try:
-            bridge.resize(cols=123, rows=45)
-            output = _read_until(bridge, b"45", timeout=5.0)
-            # tput prints just the numbers, one per line
-            assert b"123" in output
-            assert b"45" in output
-        finally:
-            bridge.close()
-
-
-@skip_on_windows
-class TestPtyBridgeClose:
-    def test_close_is_idempotent(self):
-        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
-        bridge.close()
-        bridge.close()  # must not raise
-        assert not bridge.is_alive()
-
-    def test_close_terminates_long_running_child(self):
-        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
-        pid = bridge.pid
-        bridge.close()
-        # Give the kernel a moment to reap
-        deadline = time.monotonic() + 3.0
-        reaped = False
-        while time.monotonic() < deadline:
-            try:
-                os.kill(pid, 0)
-                time.sleep(0.05)
-            except ProcessLookupError:
-                reaped = True
-                break
-        assert reaped, f"pid {pid} still running after close()"
-
-
-@skip_on_windows
-class TestPtyBridgeEnv:
-    def test_cwd_is_respected(self, tmp_path):
-        bridge = PtyBridge.spawn(
-            ["/bin/sh", "-c", "pwd"],
-            cwd=str(tmp_path),
-        )
-        try:
-            output = _read_until(bridge, str(tmp_path).encode())
-            assert str(tmp_path).encode() in output
-        finally:
-            bridge.close()
-
-    def test_env_is_forwarded(self):
-        bridge = PtyBridge.spawn(
-            ["/bin/sh", "-c", "printf %s \"$HERMES_PTY_TEST\""],
-            env={**os.environ, "HERMES_PTY_TEST": "pty-env-works"},
-        )
-        try:
-            output = _read_until(bridge, b"pty-env-works")
-            assert b"pty-env-works" in output
-        finally:
-            bridge.close()
-
-
-class TestPtyBridgeUnavailable:
-    """Platform fallback semantics — PtyUnavailableError is importable and
-    carries a user-readable message."""
-
-    def test_error_carries_user_message(self):
-        err = PtyUnavailableError("platform not supported")
-        assert "platform" in str(err)
@@ -1256,186 +1256,3 @@ class TestStatusRemoteGateway:
        assert data["gateway_running"] is True
        assert data["gateway_pid"] is None
        assert data["gateway_state"] == "running"
-
-
-# ---------------------------------------------------------------------------
-# /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab.
-#
-# These tests drive the endpoint with a tiny fake command (typically ``cat``
-# or ``sh -c 'printf …'``) instead of the real ``hermes --tui`` binary.  The
-# endpoint resolves its argv through ``_resolve_chat_argv``, so tests
-# monkeypatch that hook.
-# ---------------------------------------------------------------------------
-
-import sys
-
-
-skip_on_windows = pytest.mark.skipif(
-    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
-)
-
-
-@skip_on_windows
-class TestPtyWebSocket:
-    @pytest.fixture(autouse=True)
-    def _setup(self, monkeypatch, _isolate_hermes_home):
-        from starlette.testclient import TestClient
-
-        import hermes_cli.web_server as ws
-
-        # Avoid exec'ing the actual TUI in tests: every test below installs
-        # its own fake argv via ``ws._resolve_chat_argv``.
-        self.ws_module = ws
-        self.token = ws._SESSION_TOKEN
-        self.client = TestClient(ws.app)
-
-    def _url(self, token: str | None = None, **params: str) -> str:
-        tok = token if token is not None else self.token
-        # TestClient.websocket_connect takes the path; it reconstructs the
-        # query string, so we pass it inline.
-        from urllib.parse import urlencode
-
-        q = {"token": tok, **params}
-        return f"/api/pty?{urlencode(q)}"
-
-    def test_rejects_missing_token(self, monkeypatch):
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            lambda resume=None: (["/bin/cat"], None, None),
-        )
-        from starlette.websockets import WebSocketDisconnect
-
-        with pytest.raises(WebSocketDisconnect) as exc:
-            with self.client.websocket_connect("/api/pty"):
-                pass
-        assert exc.value.code == 4401
-
-    def test_rejects_bad_token(self, monkeypatch):
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            lambda resume=None: (["/bin/cat"], None, None),
-        )
-        from starlette.websockets import WebSocketDisconnect
-
-        with pytest.raises(WebSocketDisconnect) as exc:
-            with self.client.websocket_connect(self._url(token="wrong")):
-                pass
-        assert exc.value.code == 4401
-
-    def test_streams_child_stdout_to_client(self, monkeypatch):
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            lambda resume=None: (
-                ["/bin/sh", "-c", "printf hermes-ws-ok"],
-                None,
-                None,
-            ),
-        )
-        with self.client.websocket_connect(self._url()) as conn:
-            # Drain frames until we see the needle or time out.  TestClient's
-            # recv_bytes blocks; loop until we have the signal byte string.
-            buf = b""
-            import time
-
-            deadline = time.monotonic() + 5.0
-            while time.monotonic() < deadline:
-                try:
-                    frame = conn.receive_bytes()
-                except Exception:
-                    break
-                if frame:
-                    buf += frame
-                if b"hermes-ws-ok" in buf:
-                    break
-            assert b"hermes-ws-ok" in buf
-
-    def test_client_input_reaches_child_stdin(self, monkeypatch):
-        # ``cat`` echoes stdin back, so a write → read round-trip proves
-        # the full duplex path.
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            lambda resume=None: (["/bin/cat"], None, None),
-        )
-        with self.client.websocket_connect(self._url()) as conn:
-            conn.send_bytes(b"round-trip-payload\n")
-            buf = b""
-            import time
-
-            deadline = time.monotonic() + 5.0
-            while time.monotonic() < deadline:
-                frame = conn.receive_bytes()
-                if frame:
-                    buf += frame
-                if b"round-trip-payload" in buf:
-                    break
-            assert b"round-trip-payload" in buf
-
-    def test_resize_escape_is_forwarded(self, monkeypatch):
-        # Resize escape gets intercepted and applied via TIOCSWINSZ,
-        # then ``tput cols/lines`` reports the new dimensions back.
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            # sleep gives the test time to push the resize before tput runs
-            lambda resume=None: (
-                ["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
-                None,
-                None,
-            ),
-        )
-        with self.client.websocket_connect(self._url()) as conn:
-            conn.send_text("\x1b[RESIZE:99;41]")
-            buf = b""
-            import time
-
-            deadline = time.monotonic() + 5.0
-            while time.monotonic() < deadline:
-                frame = conn.receive_bytes()
-                if frame:
-                    buf += frame
-                if b"99" in buf and b"41" in buf:
-                    break
-            assert b"99" in buf and b"41" in buf
-
-    def test_unavailable_platform_closes_with_message(self, monkeypatch):
-        from hermes_cli.pty_bridge import PtyUnavailableError
-
-        def _raise(argv, **kwargs):
-            raise PtyUnavailableError("pty missing for tests")
-
-        monkeypatch.setattr(
-            self.ws_module,
-            "_resolve_chat_argv",
-            lambda resume=None: (["/bin/cat"], None, None),
-        )
-        # Patch PtyBridge.spawn at the web_server module's binding.
-        import hermes_cli.web_server as ws_mod
-
-        monkeypatch.setattr(ws_mod.PtyBridge, "spawn", classmethod(lambda cls, *a, **k: _raise(*a, **k)))
-
-        with self.client.websocket_connect(self._url()) as conn:
-            # Expect a final text frame with the error message, then close.
-            msg = conn.receive_text()
-            assert "pty missing" in msg or "unavailable" in msg.lower() or "pty" in msg.lower()
-
-    def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch):
-        captured: dict = {}
-
-        def fake_resolve(resume=None):
-            captured["resume"] = resume
-            return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None)
-
-        monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
-
-        with self.client.websocket_connect(self._url(resume="sess-42")) as conn:
-            # Drain briefly so the handler actually invokes the resolver.
-            try:
-                conn.receive_bytes()
-            except Exception:
-                pass
-        assert captured.get("resume") == "sess-42"
-
@@ -104,7 +104,7 @@ def main():
    test_file = create_test_dataset()
    
    print(f"\n📝 To run the test manually:")
-    print(f"   python batch_runner.py \\")
+    print(f"   python scripts/batch_runner.py \\")
    print(f"       --dataset_file={test_file} \\")
    print(f"       --batch_size=2 \\")
    print(f"       --run_name={run_name} \\")
@@ -112,7 +112,7 @@ def main():
    print(f"       --num_workers=2")
    
    print(f"\n💡 Or test with different distributions:")
-    print(f"   python batch_runner.py --list_distributions")
+    print(f"   python scripts/batch_runner.py --list_distributions")
    
    print(f"\n🔍 After running, you can verify output with:")
    print(f"   python tests/test_batch_runner.py --verify")
@@ -30,7 +30,7 @@ from pathlib import Path
 from typing import List, Dict, Any
 import traceback

-# Add project root to path to import batch_runner
+# Add project root to path to import scripts.batch_runner
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))


@@ -135,7 +135,7 @@ def test_current_implementation():
        shutil.rmtree(output_dir)
    
    # Import here to avoid issues if module changes
-    from batch_runner import BatchRunner
+    from scripts.batch_runner import BatchRunner
    
    checkpoint_file = output_dir / "checkpoint.json"
    
@@ -229,7 +229,7 @@ def test_interruption_and_resume():
    if output_dir.exists():
        shutil.rmtree(output_dir)
    
-    from batch_runner import BatchRunner
+    from scripts.batch_runner import BatchRunner
    
    checkpoint_file = output_dir / "checkpoint.json"
    
@@ -0,0 +1,299 @@
+"""Tests for the bundled ``openai-codex`` image_gen plugin.
+
+Mirrors ``test_openai_provider.py`` but targets the standalone
+Codex/ChatGPT-OAuth-backed provider that uses the Responses
+``image_generation`` tool path instead of the ``images.generate`` REST
+endpoint.
+"""
+
+from __future__ import annotations
+
+import importlib
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+# The plugin directory uses a hyphen, which is not a valid Python identifier
+# for the dotted-import form. Load it via importlib so tests don't need to
+# touch sys.path or rename the directory.
+codex_plugin = importlib.import_module("plugins.image_gen.openai-codex")
+
+
+# 1×1 transparent PNG — valid bytes for save_b64_image()
+_PNG_HEX = (
+    "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+    "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+    "ae426082"
+)
+
+
+def _b64_png() -> str:
+    import base64
+    return base64.b64encode(bytes.fromhex(_PNG_HEX)).decode()
+
+
+class _FakeStream:
+    def __init__(self, events, final_response):
+        self._events = list(events)
+        self._final = final_response
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def __iter__(self):
+        return iter(self._events)
+
+    def get_final_response(self):
+        return self._final
+
+
+@pytest.fixture(autouse=True)
+def _tmp_hermes_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    yield tmp_path
+
+
+@pytest.fixture
+def provider(monkeypatch):
+    # Codex plugin is API-key-independent; clear it to make the test honest.
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    return codex_plugin.OpenAICodexImageGenProvider()
+
+
+# ── Metadata ────────────────────────────────────────────────────────────────
+
+
+class TestMetadata:
+    def test_name(self, provider):
+        assert provider.name == "openai-codex"
+
+    def test_display_name(self, provider):
+        assert provider.display_name == "OpenAI (Codex auth)"
+
+    def test_default_model(self, provider):
+        assert provider.default_model() == "gpt-image-2-medium"
+
+    def test_list_models_three_tiers(self, provider):
+        ids = [m["id"] for m in provider.list_models()]
+        assert ids == ["gpt-image-2-low", "gpt-image-2-medium", "gpt-image-2-high"]
+
+    def test_setup_schema_has_no_required_env_vars(self, provider):
+        schema = provider.get_setup_schema()
+        assert schema["env_vars"] == []
+        assert schema["badge"] == "free"
+
+
+# ── Availability ────────────────────────────────────────────────────────────
+
+
+class TestAvailability:
+    def test_unavailable_without_codex_token(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
+        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
+
+    def test_available_with_codex_token(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is True
+
+    def test_openai_api_key_alone_is_not_enough(self, monkeypatch):
+        # Codex plugin is intentionally orthogonal to the API-key plugin —
+        # the API key alone must NOT make it appear available.
+        monkeypatch.setenv("OPENAI_API_KEY", "sk-test")
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
+        assert codex_plugin.OpenAICodexImageGenProvider().is_available() is False
+
+
+# ── Generate ────────────────────────────────────────────────────────────────
+
+
+class TestGenerate:
+    def test_returns_auth_error_without_codex_token(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: None)
+        result = provider.generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "auth_required"
+
+    def test_returns_invalid_argument_for_empty_prompt(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+        result = provider.generate("   ")
+        assert result["success"] is False
+        assert result["error_type"] == "invalid_argument"
+
+    def test_generate_uses_codex_stream_path(self, provider, monkeypatch, tmp_path):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        output_item = SimpleNamespace(
+            type="image_generation_call",
+            status="generating",
+            id="ig_test",
+            result=_b64_png(),
+        )
+        done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
+        final_response = SimpleNamespace(output=[], status="completed", output_text="")
+
+        fake_client = SimpleNamespace(
+            responses=SimpleNamespace(
+                stream=lambda **kwargs: _FakeStream([done_event], final_response)
+            )
+        )
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat", aspect_ratio="landscape")
+
+        assert result["success"] is True
+        assert result["model"] == "gpt-image-2-medium"
+        assert result["provider"] == "openai-codex"
+        assert result["quality"] == "medium"
+
+        saved = Path(result["image"])
+        assert saved.exists()
+        assert saved.parent == tmp_path / "cache" / "images"
+        # Filename prefix differs from the API-key plugin so cache audits can
+        # tell the two backends apart.
+        assert saved.name.startswith("openai_codex_")
+
+    def test_codex_stream_request_shape(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        captured = {}
+
+        def _stream(**kwargs):
+            captured.update(kwargs)
+            output_item = SimpleNamespace(
+                type="image_generation_call",
+                status="generating",
+                id="ig_test",
+                result=_b64_png(),
+            )
+            done_event = SimpleNamespace(type="response.output_item.done", item=output_item)
+            final_response = SimpleNamespace(output=[], status="completed", output_text="")
+            return _FakeStream([done_event], final_response)
+
+        fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_stream))
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat", aspect_ratio="portrait")
+        assert result["success"] is True
+
+        assert captured["model"] == "gpt-5.4"
+        assert captured["store"] is False
+        assert captured["input"][0]["type"] == "message"
+        assert captured["input"][0]["role"] == "user"
+        assert captured["input"][0]["content"][0]["type"] == "input_text"
+        assert captured["tool_choice"]["type"] == "allowed_tools"
+        assert captured["tool_choice"]["mode"] == "required"
+        assert captured["tool_choice"]["tools"] == [{"type": "image_generation"}]
+
+        tool = captured["tools"][0]
+        assert tool["type"] == "image_generation"
+        assert tool["model"] == "gpt-image-2"
+        assert tool["quality"] == "medium"
+        assert tool["size"] == "1024x1536"
+        assert tool["output_format"] == "png"
+        assert tool["background"] == "opaque"
+        assert tool["partial_images"] == 1
+
+    def test_partial_image_event_used_when_done_missing(self, provider, monkeypatch):
+        """If the stream never emits output_item.done, fall back to the
+        partial_image event so users at least get the latest preview frame."""
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        partial_event = SimpleNamespace(
+            type="response.image_generation_call.partial_image",
+            partial_image_b64=_b64_png(),
+        )
+        final_response = SimpleNamespace(output=[], status="completed", output_text="")
+
+        fake_client = SimpleNamespace(
+            responses=SimpleNamespace(
+                stream=lambda **kwargs: _FakeStream([partial_event], final_response)
+            )
+        )
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat")
+        assert result["success"] is True
+        assert Path(result["image"]).exists()
+
+    def test_final_response_sweep_recovers_image(self, provider, monkeypatch):
+        """If no image_generation_call event arrives mid-stream, the
+        post-stream final-response sweep should still find the image."""
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        final_item = SimpleNamespace(
+            type="image_generation_call",
+            status="completed",
+            id="ig_final",
+            result=_b64_png(),
+        )
+        final_response = SimpleNamespace(output=[final_item], status="completed", output_text="")
+
+        fake_client = SimpleNamespace(
+            responses=SimpleNamespace(
+                stream=lambda **kwargs: _FakeStream([], final_response)
+            )
+        )
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat")
+        assert result["success"] is True
+        assert Path(result["image"]).exists()
+
+    def test_empty_response_returns_error(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        final_response = SimpleNamespace(output=[], status="completed", output_text="")
+        fake_client = SimpleNamespace(
+            responses=SimpleNamespace(
+                stream=lambda **kwargs: _FakeStream([], final_response)
+            )
+        )
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "empty_response"
+
+    def test_client_init_failure_returns_auth_error(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: None)
+
+        result = provider.generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "auth_required"
+
+    def test_stream_exception_returns_api_error(self, provider, monkeypatch):
+        monkeypatch.setattr(codex_plugin, "_read_codex_access_token", lambda: "codex-token")
+
+        def _boom(**kwargs):
+            raise RuntimeError("cloudflare 403")
+
+        fake_client = SimpleNamespace(responses=SimpleNamespace(stream=_boom))
+        monkeypatch.setattr(codex_plugin, "_build_codex_client", lambda: fake_client)
+
+        result = provider.generate("a cat")
+        assert result["success"] is False
+        assert result["error_type"] == "api_error"
+        assert "cloudflare 403" in result["error"]
+
+
+# ── Plugin entry point ──────────────────────────────────────────────────────
+
+
+class TestRegistration:
+    def test_register_calls_register_image_gen_provider(self):
+        registered = []
+
+        class _Ctx:
+            def register_image_gen_provider(self, prov):
+                registered.append(prov)
+
+        codex_plugin.register(_Ctx())
+        assert len(registered) == 1
+        assert registered[0].name == "openai-codex"
@@ -47,31 +47,31 @@ def _make_anthropic_response(blocks, stop_reason: str = "max_tokens"):


 class TestTruncatedAnthropicResponseNormalization:
-    """normalize_anthropic_response() gives us the shape _build_assistant_message expects."""
+    """AnthropicTransport.normalize_response() gives us the shape _build_assistant_message expects."""

    def test_text_only_truncation_produces_text_content_no_tool_calls(self):
        """Pure-text Anthropic truncation → continuation path should fire."""
-        from agent.anthropic_adapter import normalize_anthropic_response
+        from agent.transports import get_transport

        response = _make_anthropic_response(
            [_make_anthropic_text_block("partial response that was cut off")]
        )
-        msg, finish = normalize_anthropic_response(response)
+        nr = get_transport("anthropic_messages").normalize_response(response)

        # The continuation block checks these two attributes:
        #   assistant_message.content  → appended to truncated_response_prefix
        #   assistant_message.tool_calls → guards the text-retry branch
-        assert msg.content is not None
-        assert "partial response" in msg.content
-        assert not msg.tool_calls, (
+        assert nr.content is not None
+        assert "partial response" in nr.content
+        assert not nr.tool_calls, (
            "Pure-text truncation must have no tool_calls so the text-continuation "
            "branch (not the tool-retry branch) fires"
        )
-        assert finish == "length", "max_tokens stop_reason must map to OpenAI-style 'length'"
+        assert nr.finish_reason == "length", "max_tokens stop_reason must map to OpenAI-style 'length'"

    def test_truncated_tool_call_produces_tool_calls(self):
        """Tool-use truncation → tool-call retry path should fire."""
-        from agent.anthropic_adapter import normalize_anthropic_response
+        from agent.transports import get_transport

        response = _make_anthropic_response(
            [
@@ -79,24 +79,24 @@ class TestTruncatedAnthropicResponseNormalization:
                _make_anthropic_tool_use_block(),
            ]
        )
-        msg, finish = normalize_anthropic_response(response)
+        nr = get_transport("anthropic_messages").normalize_response(response)

-        assert bool(msg.tool_calls), (
+        assert bool(nr.tool_calls), (
            "Truncation mid-tool_use must expose tool_calls so the "
            "tool-call retry branch fires instead of text continuation"
        )
-        assert finish == "length"
+        assert nr.finish_reason == "length"

    def test_empty_content_does_not_crash(self):
        """Empty response.content — defensive: treat as a truncation with no text."""
-        from agent.anthropic_adapter import normalize_anthropic_response
+        from agent.transports import get_transport

        response = _make_anthropic_response([])
-        msg, finish = normalize_anthropic_response(response)
+        nr = get_transport("anthropic_messages").normalize_response(response)
        # Depending on the adapter, content may be "" or None — both are
        # acceptable; what matters is no exception.
-        assert msg is not None
-        assert not msg.tool_calls
+        assert nr is not None
+        assert not nr.tool_calls


 class TestContinuationLogicBranching:
@@ -8,11 +8,7 @@ from unittest.mock import patch, MagicMock

 import pytest

-# batch_runner uses relative imports, ensure project root is on path
-import sys
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-from batch_runner import BatchRunner, _process_batch_worker
+from scripts.batch_runner import BatchRunner, _process_batch_worker


@pytest.fixture
@@ -173,7 +169,7 @@ class TestBatchWorkerResumeBehavior:
            "toolsets_used": [],
        }

-        monkeypatch.setattr("batch_runner._process_single_prompt", lambda *args, **kwargs: prompt_result)
+        monkeypatch.setattr("scripts.batch_runner._process_single_prompt", lambda *args, **kwargs: prompt_result)

        result = _process_batch_worker((
            1,
@@ -14,7 +14,7 @@ def test_run_task_kimi_omits_temperature():
        )
        mock_openai.return_value = client

-        from mini_swe_runner import MiniSWERunner
+        from scripts.mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-for-coding",
@@ -42,7 +42,7 @@ def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
        )
        mock_openai.return_value = client

-        from mini_swe_runner import MiniSWERunner
+        from scripts.mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-k2.5",
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, patch, MagicMock

 import pytest

-from trajectory_compressor import (
+from scripts.trajectory_compressor import (
    CompressionConfig,
    TrajectoryMetrics,
    AggregateMetrics,
@@ -25,8 +25,8 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(home))
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)

-    sys.modules.pop("trajectory_compressor", None)
-    importlib.import_module("trajectory_compressor")
+    sys.modules.pop("scripts.trajectory_compressor", None)
+    importlib.import_module("scripts.trajectory_compressor")

    assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home"

@@ -22,7 +22,7 @@ class TestAsyncClientLazyCreation:

    def test_async_client_none_after_init(self):
        """async_client should be None after __init__ (not eagerly created)."""
-        from trajectory_compressor import TrajectoryCompressor
+        from scripts.trajectory_compressor import TrajectoryCompressor

        comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
        comp.config = MagicMock()
@@ -36,7 +36,7 @@ class TestAsyncClientLazyCreation:

    def test_get_async_client_creates_new_client(self):
        """_get_async_client() should create a fresh AsyncOpenAI instance."""
-        from trajectory_compressor import TrajectoryCompressor
+        from scripts.trajectory_compressor import TrajectoryCompressor

        comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
        comp.config = MagicMock()
@@ -57,7 +57,7 @@ class TestAsyncClientLazyCreation:
    def test_get_async_client_creates_fresh_each_call(self):
        """Each call to _get_async_client() creates a NEW client instance,
        so it binds to the current event loop."""
-        from trajectory_compressor import TrajectoryCompressor
+        from scripts.trajectory_compressor import TrajectoryCompressor

        comp = TrajectoryCompressor.__new__(TrajectoryCompressor)
        comp.config = MagicMock()
@@ -91,7 +91,7 @@ class TestSourceLineVerification:
    def _read_file() -> str:
        import os
        base = os.path.dirname(os.path.dirname(__file__))
-        with open(os.path.join(base, "trajectory_compressor.py")) as f:
+        with open(os.path.join(base, "scripts", "trajectory_compressor.py")) as f:
            return f.read()

    def test_no_eager_async_openai_in_init(self):
@@ -119,7 +119,7 @@ class TestSourceLineVerification:
@pytest.mark.asyncio
 async def test_generate_summary_async_kimi_omits_temperature():
    """Kimi models should have temperature omitted — server manages it."""
-    from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
+    from scripts.trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics

    config = CompressionConfig(
        summarization_model="kimi-for-coding",
@@ -147,7 +147,7 @@ async def test_generate_summary_async_kimi_omits_temperature():
@pytest.mark.asyncio
 async def test_generate_summary_async_public_moonshot_kimi_k2_5_omits_temperature():
    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
-    from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
+    from scripts.trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics

    config = CompressionConfig(
        summarization_model="kimi-k2.5",
@@ -176,7 +176,7 @@ async def test_generate_summary_async_public_moonshot_kimi_k2_5_omits_temperatur
@pytest.mark.asyncio
 async def test_generate_summary_async_public_moonshot_cn_kimi_k2_5_omits_temperature():
    """kimi-k2.5 on api.moonshot.cn should not get a forced temperature."""
-    from trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics
+    from scripts.trajectory_compressor import CompressionConfig, TrajectoryCompressor, TrajectoryMetrics

    config = CompressionConfig(
        summarization_model="kimi-k2.5",
@@ -87,7 +87,7 @@ class TestTrajectoryCompressorNullGuard:

    def test_null_base_url_does_not_crash(self):
        """base_url=None should not crash _detect_provider()."""
-        from trajectory_compressor import CompressionConfig, TrajectoryCompressor
+        from scripts.trajectory_compressor import CompressionConfig, TrajectoryCompressor

        config = CompressionConfig()
        config.base_url = None
@@ -101,7 +101,7 @@ class TestTrajectoryCompressorNullGuard:

    def test_config_loading_null_base_url_keeps_default(self):
        """YAML ``summarization: {base_url: null}`` should keep default."""
-        from trajectory_compressor import CompressionConfig
+        from scripts.trajectory_compressor import CompressionConfig
        from hermes_constants import OPENROUTER_BASE_URL

        config = CompressionConfig()
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import json
+import pytest
+
+from agent import image_gen_registry
+from agent.image_gen_provider import ImageGenProvider
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry():
+    image_gen_registry._reset_for_tests()
+    yield
+    image_gen_registry._reset_for_tests()
+
+
+class _FakeCodexProvider(ImageGenProvider):
+    @property
+    def name(self) -> str:
+        return "codex"
+
+    def generate(self, prompt, aspect_ratio="landscape", **kwargs):
+        return {
+            "success": True,
+            "image": "/tmp/codex-test.png",
+            "model": "gpt-5.2-codex",
+            "prompt": prompt,
+            "aspect_ratio": aspect_ratio,
+            "provider": "codex",
+        }
+
+
+class TestPluginDispatch:
+    def test_dispatch_routes_to_codex_provider(self, monkeypatch, tmp_path):
+        from tools import image_generation_tool
+        from agent import image_gen_registry as registry_module
+        from hermes_cli import plugins as plugins_module
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text("image_gen:\n  provider: codex\n")
+        image_gen_registry.register_provider(_FakeCodexProvider())
+
+        monkeypatch.setattr(image_generation_tool, "_read_configured_image_provider", lambda: "codex")
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda: None)
+        monkeypatch.setattr(registry_module, "get_provider", lambda name: _FakeCodexProvider() if name == "codex" else None)
+
+        dispatched = image_generation_tool._dispatch_to_plugin_provider("draw cat", "square")
+        payload = json.loads(dispatched)
+
+        assert payload["success"] is True
+        assert payload["provider"] == "codex"
+        assert payload["image"] == "/tmp/codex-test.png"
+        assert payload["aspect_ratio"] == "square"
+
+    def test_dispatch_reports_missing_registered_provider(self, monkeypatch, tmp_path):
+        from tools import image_generation_tool
+        from hermes_cli import plugins as plugins_module
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text("image_gen:\n  provider: missing-codex\n")
+
+        monkeypatch.setattr(image_generation_tool, "_read_configured_image_provider", lambda: "missing-codex")
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", lambda: None)
+
+        dispatched = image_generation_tool._dispatch_to_plugin_provider("draw cat", "landscape")
+        payload = json.loads(dispatched)
+
+        assert payload["success"] is False
+        assert payload["error_type"] == "provider_not_registered"
+        assert "image_gen.provider='missing-codex'" in payload["error"]
+
+    def test_dispatch_force_refreshes_plugins_when_provider_initially_missing(self, monkeypatch, tmp_path):
+        from tools import image_generation_tool
+        from hermes_cli import plugins as plugins_module
+        from agent import image_gen_registry as registry_module
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "config.yaml").write_text("image_gen:\n  provider: codex\n")
+
+        monkeypatch.setattr(image_generation_tool, "_read_configured_image_provider", lambda: "codex")
+
+        calls = []
+        provider_state = {"provider": None}
+
+        def fake_ensure_plugins_discovered(force=False):
+            calls.append(force)
+            if force:
+                provider_state["provider"] = _FakeCodexProvider()
+
+        monkeypatch.setattr(plugins_module, "_ensure_plugins_discovered", fake_ensure_plugins_discovered)
+        monkeypatch.setattr(registry_module, "get_provider", lambda name: provider_state["provider"])
+
+        dispatched = image_generation_tool._dispatch_to_plugin_provider("draw hammy", "portrait")
+        payload = json.loads(dispatched)
+
+        assert calls == [False, True]
+        assert payload["success"] is True
+        assert payload["provider"] == "codex"
+        assert payload["aspect_ratio"] == "portrait"
@@ -5,6 +5,8 @@ terminates processes, and handles edge cases on failure paths.
 Inspired by PR #715 (0xbyt4).
 """

+import dataclasses
+import io
 from unittest.mock import MagicMock

 import pytest
@@ -118,6 +120,29 @@ class TestStopTrainingRunProcesses:
        trainer.terminate.assert_not_called()


+class TestRunStateLogFileFields:
+
+    def test_log_file_fields_default_none(self):
+        """All three log_file fields should default to None."""
+        state = _make_run_state()
+        assert state.api_log_file is None
+        assert state.trainer_log_file is None
+        assert state.env_log_file is None
+
+    def test_accepts_file_handle_for_api_log(self):
+        """api_log_file should accept an open file-like object."""
+        api_log = io.StringIO()
+        state = _make_run_state(api_log_file=api_log)
+        assert state.api_log_file is api_log
+
+    def test_log_file_fields_present_in_dataclass(self):
+        """All three field names must be declared on the RunState dataclass."""
+        field_names = {f.name for f in dataclasses.fields(RunState)}
+        assert "api_log_file" in field_names
+        assert "trainer_log_file" in field_names
+        assert "env_log_file" in field_names
+
+
 class TestStopTrainingRunStatus:
    """Verify status transitions in _stop_training_run."""

@@ -402,6 +402,86 @@ class TestSyncSkills:

        assert (user_skill / "SKILL.md").read_text() == "# User modified"

+    def test_collision_does_not_poison_manifest(self, tmp_path):
+        """Collision with an unmanifested user skill must NOT record bundled_hash.
+
+        Otherwise the next sync compares user_hash against the recorded
+        bundled_hash, finds a mismatch, and permanently flags the skill as
+        'user-modified' — even though the user never touched a bundled copy.
+        """
+        bundled = self._setup_bundled(tmp_path)
+        skills_dir = tmp_path / "user_skills"
+        manifest_file = skills_dir / ".bundled_manifest"
+
+        # Pre-existing user skill (e.g. from hub, custom, or leftover) that
+        # happens to share a name with a newly bundled skill.
+        user_skill = skills_dir / "category" / "new-skill"
+        user_skill.mkdir(parents=True)
+        (user_skill / "SKILL.md").write_text("# From hub — unrelated to bundled")
+
+        with self._patches(bundled, skills_dir, manifest_file):
+            sync_skills(quiet=True)
+
+        # User file must survive (existing invariant).
+        assert (user_skill / "SKILL.md").read_text() == (
+            "# From hub — unrelated to bundled"
+        )
+
+        # Manifest must NOT contain the skill — it was never synced from bundled.
+        with patch("tools.skills_sync.MANIFEST_FILE", manifest_file):
+            manifest = _read_manifest()
+        assert "new-skill" not in manifest, (
+            "Collision path wrote bundled_hash to the manifest even though "
+            "the on-disk copy is unrelated to bundled. This poisons update "
+            "detection: the next sync will mark the skill as 'user-modified'."
+        )
+
+    def test_collision_does_not_trigger_false_user_modified_on_resync(self, tmp_path):
+        """End-to-end: after a collision, a second sync must not flag user_modified.
+
+        Pre-fix bug: first sync wrote bundled_hash to the manifest; second
+        sync then diffed user_hash vs bundled_hash, mismatched, and shoved
+        the skill into the user_modified bucket forever.
+        """
+        bundled = self._setup_bundled(tmp_path)
+        skills_dir = tmp_path / "user_skills"
+        manifest_file = skills_dir / ".bundled_manifest"
+
+        user_skill = skills_dir / "category" / "new-skill"
+        user_skill.mkdir(parents=True)
+        (user_skill / "SKILL.md").write_text("# From hub — unrelated to bundled")
+
+        with self._patches(bundled, skills_dir, manifest_file):
+            sync_skills(quiet=True)  # first sync: collision path
+            result2 = sync_skills(quiet=True)  # second sync: must not flag
+
+        assert "new-skill" not in result2["user_modified"], (
+            "Second sync after a collision falsely flagged the user's skill "
+            "as 'user-modified' — the manifest was poisoned on the first sync."
+        )
+
+    def test_collision_prints_reset_hint(self, tmp_path, capsys):
+        """Non-quiet sync must print a reset hint when a collision is skipped.
+
+        Silent skip hides the fact that a bundled skill shipped but was
+        shadowed by the user's local copy. The hint tells the user the
+        exact command to take the bundled version instead.
+        """
+        bundled = self._setup_bundled(tmp_path)
+        skills_dir = tmp_path / "user_skills"
+        manifest_file = skills_dir / ".bundled_manifest"
+
+        user_skill = skills_dir / "category" / "new-skill"
+        user_skill.mkdir(parents=True)
+        (user_skill / "SKILL.md").write_text("# From hub — unrelated to bundled")
+
+        with self._patches(bundled, skills_dir, manifest_file):
+            sync_skills(quiet=False)
+
+        captured = capsys.readouterr().out
+        assert "new-skill" in captured
+        assert "hermes skills reset new-skill" in captured
+
    def test_nonexistent_bundled_dir(self, tmp_path):
        with patch("tools.skills_sync._get_bundled_dir", return_value=tmp_path / "nope"):
            result = sync_skills(quiet=True)
@@ -995,3 +995,262 @@ class TestTranscribeAudioMistralDispatch:
            transcribe_audio(sample_ogg, model="voxtral-mini-2602")

        assert mock_mistral.call_args[0][1] == "voxtral-mini-2602"
+
+
+# ============================================================================
+# _transcribe_xai
+# ============================================================================
+
+
+@pytest.fixture
+def mock_xai_http_module():
+    """Inject a fake tools.xai_http module for testing."""
+    fake_module = MagicMock()
+    fake_module.hermes_xai_user_agent = MagicMock(return_value="hermes-xai/test")
+    with patch.dict("sys.modules", {"tools.xai_http": fake_module}):
+        yield fake_module
+
+
+class TestTranscribeXAI:
+    def test_no_key(self, monkeypatch):
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        from tools.transcription_tools import _transcribe_xai
+        result = _transcribe_xai("/tmp/test.ogg", "grok-stt")
+        assert result["success"] is False
+        assert "XAI_API_KEY" in result["error"]
+
+    def test_successful_transcription(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "text": "bonjour le monde",
+            "language": "fr",
+            "duration": 3.2,
+        }
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["success"] is True
+        assert result["transcript"] == "bonjour le monde"
+        assert result["provider"] == "xai"
+
+    def test_whitespace_stripped(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "  hello world  \n"}
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["transcript"] == "hello world"
+
+    def test_api_error_returns_failure(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+        mock_response.json.return_value = {"error": {"message": "Invalid audio format"}}
+        mock_response.text = '{"error": {"message": "Invalid audio format"}}'
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["success"] is False
+        assert "HTTP 400" in result["error"]
+        assert "Invalid audio format" in result["error"]
+
+    def test_empty_transcript_returns_failure(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "   "}
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["success"] is False
+        assert "empty transcript" in result["error"]
+
+    def test_permission_error(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("builtins.open", side_effect=PermissionError("denied")):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["success"] is False
+        assert "Permission denied" in result["error"]
+
+    def test_network_error_returns_failure(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", side_effect=ConnectionError("timeout")):
+            from tools.transcription_tools import _transcribe_xai
+            result = _transcribe_xai(sample_ogg, "grok-stt")
+
+        assert result["success"] is False
+        assert "timeout" in result["error"]
+
+    def test_sends_language_and_format(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+        # Explicitly set language via env to exercise the override chain
+        # (config > env > DEFAULT_LOCAL_STT_LANGUAGE)
+        monkeypatch.setenv("HERMES_LOCAL_STT_LANGUAGE", "fr")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "test", "language": "fr", "duration": 1.0}
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response) as mock_post:
+            from tools.transcription_tools import _transcribe_xai
+            _transcribe_xai(sample_ogg, "grok-stt")
+
+        call_kwargs = mock_post.call_args
+        data = call_kwargs.kwargs.get("data", call_kwargs[1].get("data", {}))
+        assert data.get("language") == "fr"
+        assert data.get("format") == "true"
+
+    def test_custom_base_url(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+        monkeypatch.setenv("XAI_STT_BASE_URL", "https://custom.x.ai/v1")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "test", "language": "en", "duration": 1.0}
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response) as mock_post:
+            from tools.transcription_tools import _transcribe_xai
+            _transcribe_xai(sample_ogg, "grok-stt")
+
+        call_args = mock_post.call_args
+        url = call_args[0][0] if call_args[0] else call_args.kwargs.get("url", "")
+        assert "custom.x.ai" in url
+
+    def test_diarize_sent_when_configured(self, monkeypatch, sample_ogg, mock_xai_http_module):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "test", "language": "fr", "duration": 1.0}
+
+        config = {"xai": {"diarize": True}}
+        with patch("tools.transcription_tools._load_stt_config", return_value=config), \
+             patch("requests.post", return_value=mock_response) as mock_post:
+            from tools.transcription_tools import _transcribe_xai
+            _transcribe_xai(sample_ogg, "grok-stt")
+
+        data = mock_post.call_args.kwargs.get("data", mock_post.call_args[1].get("data", {}))
+        assert data.get("diarize") == "true"
+
+
+# ============================================================================
+# _get_provider — xAI
+# ============================================================================
+
+class TestGetProviderXAI:
+    """xAI-specific provider selection tests."""
+
+    def test_xai_when_key_set(self, monkeypatch):
+        monkeypatch.setenv("XAI_API_KEY", "xai-test")
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"provider": "xai"}) == "xai"
+
+    def test_xai_explicit_no_key_returns_none(self, monkeypatch):
+        """Explicit xai with no key returns none — no cross-provider fallback."""
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"provider": "xai"}) == "none"
+
+    def test_auto_detect_xai_after_mistral(self, monkeypatch):
+        """Auto-detect: xai is tried after mistral when all above are unavailable."""
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
+        monkeypatch.setenv("XAI_API_KEY", "xai-test")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "xai"
+
+    def test_auto_detect_mistral_preferred_over_xai(self, monkeypatch):
+        """Auto-detect: mistral is preferred over xai."""
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        monkeypatch.setenv("XAI_API_KEY", "xai-test")
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "mistral"
+
+    def test_auto_detect_no_key_returns_none(self, monkeypatch):
+        """Auto-detect: xai skipped when no key is set."""
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "none"
+
+
+# ============================================================================
+# transcribe_audio — xAI dispatch
+# ============================================================================
+
+class TestTranscribeAudioXAIDispatch:
+    def test_dispatches_to_xai(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "xai"}), \
+             patch("tools.transcription_tools._get_provider", return_value="xai"), \
+             patch("tools.transcription_tools._transcribe_xai",
+                   return_value={"success": True, "transcript": "hi", "provider": "xai"}) as mock_xai:
+            from tools.transcription_tools import transcribe_audio
+            result = transcribe_audio(sample_ogg)
+
+        assert result["success"] is True
+        assert result["provider"] == "xai"
+        mock_xai.assert_called_once()
+
+    def test_model_default_is_grok_stt(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "xai"}), \
+             patch("tools.transcription_tools._get_provider", return_value="xai"), \
+             patch("tools.transcription_tools._transcribe_xai",
+                   return_value={"success": True, "transcript": "hi"}) as mock_xai:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model=None)
+
+        assert mock_xai.call_args[0][1] == "grok-stt"
+
+    def test_model_override_passed_to_xai(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("tools.transcription_tools._get_provider", return_value="xai"), \
+             patch("tools.transcription_tools._transcribe_xai",
+                   return_value={"success": True, "transcript": "hi"}) as mock_xai:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model="custom-stt")
+
+        assert mock_xai.call_args[0][1] == "custom-stt"
@@ -16,7 +16,7 @@ import sys
 import threading
 import time
 import unicodedata
-from typing import Optional
+from typing import Any, Callable, Dict, Optional

 logger = logging.getLogger(__name__)

@@ -228,10 +228,10 @@ class _ApprovalEntry:


 _gateway_queues: dict[str, list] = {}        # session_key → [_ApprovalEntry, …]
-_gateway_notify_cbs: dict[str, object] = {}  # session_key → callable(approval_data)
+_gateway_notify_cbs: Dict[str, Callable[[Dict[str, Any]], None]] = {}


-def register_gateway_notify(session_key: str, cb) -> None:
+def register_gateway_notify(session_key: str, cb: Callable[[Dict[str, Any]], None]) -> None:
    """Register a per-session callback for sending approval requests to the user.

    The callback signature is ``cb(approval_data: dict) -> None`` where
@@ -891,7 +891,7 @@ BROWSER_TOOL_SCHEMAS = [
 # Utility Functions
 # ============================================================================

-def _create_local_session(task_id: str) -> Dict[str, str]:
+def _create_local_session(task_id: str) -> Dict[str, Any]:
    import uuid
    session_name = f"h_{uuid.uuid4().hex[:10]}"
    logger.info("Created local browser session %s for task %s",
@@ -904,7 +904,7 @@ def _create_local_session(task_id: str) -> Dict[str, str]:
    }


-def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
+def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, Any]:
    """Create a session that connects to a user-supplied CDP endpoint."""
    import uuid
    session_name = f"cdp_{uuid.uuid4().hex[:10]}"
@@ -918,7 +918,7 @@ def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
    }


-def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
+def _get_session_info(task_id: Optional[str] = None) -> Dict[str, Any]:
    """
    Get or create session info for the given task.
    
@@ -1687,7 +1687,7 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
        from tools.browser_camofox import camofox_scroll
        # Camofox REST API doesn't support pixel args; use repeated calls
        _SCROLL_REPEATS = 5
-        result = None
+        result: str = ""
        for _ in range(_SCROLL_REPEATS):
            result = camofox_scroll(direction, task_id)
        return result
@@ -68,7 +68,7 @@ def _scan_cron_prompt(prompt: str) -> str:
    return ""


-def _origin_from_env() -> Optional[Dict[str, str]]:
+def _origin_from_env() -> Optional[Dict[str, Optional[str]]]:
    from gateway.session_context import get_session_env
    origin_platform = get_session_env("HERMES_SESSION_PLATFORM")
    origin_chat_id = get_session_env("HERMES_SESSION_CHAT_ID")
@@ -29,7 +29,7 @@ from concurrent.futures import (
    TimeoutError as FuturesTimeoutError,
    as_completed,
 )
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional

 from toolsets import TOOLSETS
 from tools import file_state
@@ -584,7 +584,7 @@ def _build_child_progress_callback(
    depth: Optional[int] = None,
    model: Optional[str] = None,
    toolsets: Optional[List[str]] = None,
-) -> Optional[callable]:
+) -> Optional[Callable[..., Any]]:
    """Build a callback that relays child agent tool calls to the parent display.

    Two display paths:
@@ -922,6 +922,12 @@ def _build_child_agent(
        else (getattr(parent_agent, "acp_args", []) or [])
    )

+    if override_acp_command:
+        # If explicitly forcing an ACP transport override, the provider MUST be copilot-acp
+        # so run_agent.py initializes the CopilotACPClient.
+        effective_provider = "copilot-acp"
+        effective_api_mode = "chat_completions"
+
    # Resolve reasoning config: delegation override > parent inherit
    parent_reasoning = getattr(parent_agent, "reasoning_config", None)
    child_reasoning = parent_reasoning
@@ -1596,7 +1602,7 @@ def delegate_task(

    n_tasks = len(task_list)
    # Track goal labels for progress display (truncated for readability)
-    task_labels = [t["goal"][:40] for t in task_list]
+    task_labels = [str(t["goal"] or "")[:40] for t in task_list]

    # Save parent tool names BEFORE any child construction mutates the global.
    # _build_child_agent() calls AIAgent() which calls get_tool_definitions(),
@@ -245,7 +245,7 @@ class _ThreadedProcessHandle:
            except Exception:
                pass

-    def wait(self, timeout: float | None = None) -> int:
+    def wait(self, timeout: float | None = None) -> int | None:
        self._done.wait(timeout=timeout)
        return self._returncode

@@ -755,7 +755,7 @@ class BaseEnvironment(ABC):
        except Exception:
            pass

-    def _prepare_command(self, command: str) -> tuple[str, str | None]:
+    def _prepare_command(self, command: str) -> tuple[str | None, str | None]:
        """Transform sudo commands if SUDO_PASSWORD is available."""
        from tools.terminal_tool import _transform_sudo_command

@@ -26,10 +26,11 @@ import os
 import datetime
 import threading
 import uuid
-from typing import Any, Dict, Optional, Union
+from typing import Any, Callable, Dict, Optional, Type, Union
 from urllib.parse import urlencode

 import fal_client
+import httpx

 from tools.debug_helpers import DebugSession
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
@@ -348,21 +349,27 @@ class _ManagedFalSyncClient:

        self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
        self._sync_client = sync_client_class(key=key)
-        self._http_client = getattr(self._sync_client, "_client", None)
-        self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
-        self._raise_for_status = getattr(client_module, "_raise_for_status", None)
-        self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
-        self._add_hint_header = getattr(client_module, "add_hint_header", None)
-        self._add_priority_header = getattr(client_module, "add_priority_header", None)
-        self._add_timeout_header = getattr(client_module, "add_timeout_header", None)

-        if self._http_client is None:
+        http_client: Optional[httpx.Client] = getattr(self._sync_client, "_client", None)
+        maybe_retry: Optional[Callable[..., httpx.Response]] = getattr(client_module, "_maybe_retry_request", None)
+        raise_for_status: Optional[Callable[[httpx.Response], None]] = getattr(client_module, "_raise_for_status", None)
+        request_handle_class: Optional[Type[Any]] = getattr(client_module, "SyncRequestHandle", None)
+
+        if http_client is None:
            raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
-        if self._maybe_retry_request is None or self._raise_for_status is None:
+        if maybe_retry is None or raise_for_status is None:
            raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
-        if self._request_handle_class is None:
+        if request_handle_class is None:
            raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")

+        self._http_client: httpx.Client = http_client
+        self._maybe_retry_request: Callable[..., httpx.Response] = maybe_retry
+        self._raise_for_status: Callable[[httpx.Response], None] = raise_for_status
+        self._request_handle_class: Type[Any] = request_handle_class
+        self._add_hint_header: Optional[Callable[..., Any]] = getattr(client_module, "add_hint_header", None)
+        self._add_priority_header: Optional[Callable[..., Any]] = getattr(client_module, "add_priority_header", None)
+        self._add_timeout_header: Optional[Callable[..., Any]] = getattr(client_module, "add_timeout_header", None)
+
    def submit(
        self,
        application: str,
@@ -927,6 +934,16 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
        logger.debug("image_gen plugin dispatch skipped: %s", exc)
        return None

+    if provider is None:
+        try:
+            # Long-lived sessions may have discovered plugins before a bundled
+            # backend was patched in or before config changed. Retry once with
+            # a forced refresh before surfacing a missing-provider error.
+            _ensure_plugins_discovered(force=True)
+            provider = get_provider(configured)
+        except Exception as exc:
+            logger.debug("image_gen plugin force-refresh skipped: %s", exc)
+
    if provider is None:
        return json.dumps({
            "success": False,
@@ -994,6 +994,7 @@ class MCPServerTask:
        url = config["url"]
        headers = dict(config.get("headers") or {})
        connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT)
+        ssl_verify = config.get("ssl_verify", True)

        # OAuth 2.1 PKCE: route through the central MCPOAuthManager so the
        # same provider instance is reused across reconnects, pre-flow
@@ -1024,6 +1025,7 @@ class MCPServerTask:
            client_kwargs: dict = {
                "follow_redirects": True,
                "timeout": httpx.Timeout(float(connect_timeout), read=300.0),
+                "verify": ssl_verify,
            }
            if headers:
                client_kwargs["headers"] = headers
@@ -1052,6 +1054,7 @@ class MCPServerTask:
            _http_kwargs: dict = {
                "headers": headers,
                "timeout": float(connect_timeout),
+                "verify": ssl_verify,
            }
            if _oauth_auth is not None:
                _http_kwargs["auth"] = _oauth_auth
@@ -1503,11 +1506,15 @@ def _snapshot_child_pids() -> set:
    # Fallback: psutil
    try:
        import psutil
+    except ImportError:
+        raise ImportError(
+            "psutil is required for MCP child process tracking. "
+            "Install with: pip install hermes-agent[mcp]"
+        ) from None
+    try:
        return {c.pid for c in psutil.Process(my_pid).children()}
-    except Exception:
-        pass
-
-    return set()
+    except psutil.Error:
+        return set()


 def _mcp_loop_exception_handler(loop, context):
@@ -174,6 +174,7 @@ async def _run_reference_model_safe(
                error_msg = f"{model} failed after {max_retries} attempts: {error_str}"
                logger.error("%s", error_msg, exc_info=True)
                return model, error_msg, False
+    raise AssertionError("unreachable: retry loop exhausted")


 async def _run_aggregator_model(
@@ -71,12 +71,13 @@ def main():

    ref_text = ref_text_path.read_text(encoding="utf-8").strip()

-    # Import and run NeuTTS
    try:
        from neutts import NeuTTS
    except ImportError:
-        print("Error: neutts not installed. Run: python -m pip install -U neutts[all]", file=sys.stderr)
-        sys.exit(1)
+        raise ImportError(
+            "neutts is required for local TTS synthesis. "
+            "Install with: pip install hermes-agent[tts-local]"
+        ) from None

    tts = NeuTTS(
        backbone_repo=args.model,
@@ -93,9 +94,12 @@ def main():

    try:
        import soundfile as sf
-        sf.write(str(out_path), wav, 24000)
    except ImportError:
-        _write_wav(str(out_path), wav, 24000)
+        raise ImportError(
+            "soundfile is required for audio output. "
+            "Install with: pip install hermes-agent[tts-local]"
+        ) from None
+    sf.write(str(out_path), wav, 24000)

    print(f"OK: {out_path}", file=sys.stderr)

@@ -31,7 +31,10 @@ Usage:
 import difflib
 import re
 from dataclasses import dataclass, field
-from typing import List, Optional, Tuple, Any
+from typing import List, Optional, Tuple, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from tools.file_operations import PatchResult
 from enum import Enum


--- a/Show More
+++ b/Show More